diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-08-31 03:45:21 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-08-31 03:45:46 -0400 |
commit | daab7fc734a53fdeaf844b7c03053118ad1769da (patch) | |
tree | 575deb3cdcc6dda562acaed6f7c29bc81ae01cf2 /mm/vmscan.c | |
parent | 774ea0bcb27f57b6fd521b3b6c43237782fed4b9 (diff) | |
parent | 2bfc96a127bc1cc94d26bfaa40159966064f9c8c (diff) |
Merge commit 'v2.6.36-rc3' into x86/memblock
Conflicts:
arch/x86/kernel/trampoline.c
mm/memblock.c
Merge reason: Resolve the conflicts, update to latest upstream.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 548 |
1 files changed, 305 insertions, 243 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index b94fe1b3da43..c391c320dbaf 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -48,6 +48,9 @@ | |||
48 | 48 | ||
49 | #include "internal.h" | 49 | #include "internal.h" |
50 | 50 | ||
51 | #define CREATE_TRACE_POINTS | ||
52 | #include <trace/events/vmscan.h> | ||
53 | |||
51 | struct scan_control { | 54 | struct scan_control { |
52 | /* Incremented by the number of inactive pages that were scanned */ | 55 | /* Incremented by the number of inactive pages that were scanned */ |
53 | unsigned long nr_scanned; | 56 | unsigned long nr_scanned; |
@@ -398,6 +401,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
398 | /* synchronous write or broken a_ops? */ | 401 | /* synchronous write or broken a_ops? */ |
399 | ClearPageReclaim(page); | 402 | ClearPageReclaim(page); |
400 | } | 403 | } |
404 | trace_mm_vmscan_writepage(page, | ||
405 | trace_reclaim_flags(page, sync_writeback)); | ||
401 | inc_zone_page_state(page, NR_VMSCAN_WRITE); | 406 | inc_zone_page_state(page, NR_VMSCAN_WRITE); |
402 | return PAGE_SUCCESS; | 407 | return PAGE_SUCCESS; |
403 | } | 408 | } |
@@ -617,6 +622,24 @@ static enum page_references page_check_references(struct page *page, | |||
617 | return PAGEREF_RECLAIM; | 622 | return PAGEREF_RECLAIM; |
618 | } | 623 | } |
619 | 624 | ||
625 | static noinline_for_stack void free_page_list(struct list_head *free_pages) | ||
626 | { | ||
627 | struct pagevec freed_pvec; | ||
628 | struct page *page, *tmp; | ||
629 | |||
630 | pagevec_init(&freed_pvec, 1); | ||
631 | |||
632 | list_for_each_entry_safe(page, tmp, free_pages, lru) { | ||
633 | list_del(&page->lru); | ||
634 | if (!pagevec_add(&freed_pvec, page)) { | ||
635 | __pagevec_free(&freed_pvec); | ||
636 | pagevec_reinit(&freed_pvec); | ||
637 | } | ||
638 | } | ||
639 | |||
640 | pagevec_free(&freed_pvec); | ||
641 | } | ||
642 | |||
620 | /* | 643 | /* |
621 | * shrink_page_list() returns the number of reclaimed pages | 644 | * shrink_page_list() returns the number of reclaimed pages |
622 | */ | 645 | */ |
@@ -625,13 +648,12 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
625 | enum pageout_io sync_writeback) | 648 | enum pageout_io sync_writeback) |
626 | { | 649 | { |
627 | LIST_HEAD(ret_pages); | 650 | LIST_HEAD(ret_pages); |
628 | struct pagevec freed_pvec; | 651 | LIST_HEAD(free_pages); |
629 | int pgactivate = 0; | 652 | int pgactivate = 0; |
630 | unsigned long nr_reclaimed = 0; | 653 | unsigned long nr_reclaimed = 0; |
631 | 654 | ||
632 | cond_resched(); | 655 | cond_resched(); |
633 | 656 | ||
634 | pagevec_init(&freed_pvec, 1); | ||
635 | while (!list_empty(page_list)) { | 657 | while (!list_empty(page_list)) { |
636 | enum page_references references; | 658 | enum page_references references; |
637 | struct address_space *mapping; | 659 | struct address_space *mapping; |
@@ -806,10 +828,12 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
806 | __clear_page_locked(page); | 828 | __clear_page_locked(page); |
807 | free_it: | 829 | free_it: |
808 | nr_reclaimed++; | 830 | nr_reclaimed++; |
809 | if (!pagevec_add(&freed_pvec, page)) { | 831 | |
810 | __pagevec_free(&freed_pvec); | 832 | /* |
811 | pagevec_reinit(&freed_pvec); | 833 | * Is there need to periodically free_page_list? It would |
812 | } | 834 | * appear not as the counts should be low |
835 | */ | ||
836 | list_add(&page->lru, &free_pages); | ||
813 | continue; | 837 | continue; |
814 | 838 | ||
815 | cull_mlocked: | 839 | cull_mlocked: |
@@ -832,9 +856,10 @@ keep: | |||
832 | list_add(&page->lru, &ret_pages); | 856 | list_add(&page->lru, &ret_pages); |
833 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); | 857 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
834 | } | 858 | } |
859 | |||
860 | free_page_list(&free_pages); | ||
861 | |||
835 | list_splice(&ret_pages, page_list); | 862 | list_splice(&ret_pages, page_list); |
836 | if (pagevec_count(&freed_pvec)) | ||
837 | __pagevec_free(&freed_pvec); | ||
838 | count_vm_events(PGACTIVATE, pgactivate); | 863 | count_vm_events(PGACTIVATE, pgactivate); |
839 | return nr_reclaimed; | 864 | return nr_reclaimed; |
840 | } | 865 | } |
@@ -916,6 +941,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
916 | unsigned long *scanned, int order, int mode, int file) | 941 | unsigned long *scanned, int order, int mode, int file) |
917 | { | 942 | { |
918 | unsigned long nr_taken = 0; | 943 | unsigned long nr_taken = 0; |
944 | unsigned long nr_lumpy_taken = 0; | ||
945 | unsigned long nr_lumpy_dirty = 0; | ||
946 | unsigned long nr_lumpy_failed = 0; | ||
919 | unsigned long scan; | 947 | unsigned long scan; |
920 | 948 | ||
921 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { | 949 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { |
@@ -993,12 +1021,25 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
993 | list_move(&cursor_page->lru, dst); | 1021 | list_move(&cursor_page->lru, dst); |
994 | mem_cgroup_del_lru(cursor_page); | 1022 | mem_cgroup_del_lru(cursor_page); |
995 | nr_taken++; | 1023 | nr_taken++; |
1024 | nr_lumpy_taken++; | ||
1025 | if (PageDirty(cursor_page)) | ||
1026 | nr_lumpy_dirty++; | ||
996 | scan++; | 1027 | scan++; |
1028 | } else { | ||
1029 | if (mode == ISOLATE_BOTH && | ||
1030 | page_count(cursor_page)) | ||
1031 | nr_lumpy_failed++; | ||
997 | } | 1032 | } |
998 | } | 1033 | } |
999 | } | 1034 | } |
1000 | 1035 | ||
1001 | *scanned = scan; | 1036 | *scanned = scan; |
1037 | |||
1038 | trace_mm_vmscan_lru_isolate(order, | ||
1039 | nr_to_scan, scan, | ||
1040 | nr_taken, | ||
1041 | nr_lumpy_taken, nr_lumpy_dirty, nr_lumpy_failed, | ||
1042 | mode); | ||
1002 | return nr_taken; | 1043 | return nr_taken; |
1003 | } | 1044 | } |
1004 | 1045 | ||
@@ -1035,7 +1076,8 @@ static unsigned long clear_active_flags(struct list_head *page_list, | |||
1035 | ClearPageActive(page); | 1076 | ClearPageActive(page); |
1036 | nr_active++; | 1077 | nr_active++; |
1037 | } | 1078 | } |
1038 | count[lru]++; | 1079 | if (count) |
1080 | count[lru]++; | ||
1039 | } | 1081 | } |
1040 | 1082 | ||
1041 | return nr_active; | 1083 | return nr_active; |
@@ -1112,174 +1154,212 @@ static int too_many_isolated(struct zone *zone, int file, | |||
1112 | } | 1154 | } |
1113 | 1155 | ||
1114 | /* | 1156 | /* |
1115 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number | 1157 | * TODO: Try merging with migrations version of putback_lru_pages |
1116 | * of reclaimed pages | ||
1117 | */ | 1158 | */ |
1118 | static unsigned long shrink_inactive_list(unsigned long max_scan, | 1159 | static noinline_for_stack void |
1119 | struct zone *zone, struct scan_control *sc, | 1160 | putback_lru_pages(struct zone *zone, struct scan_control *sc, |
1120 | int priority, int file) | 1161 | unsigned long nr_anon, unsigned long nr_file, |
1162 | struct list_head *page_list) | ||
1121 | { | 1163 | { |
1122 | LIST_HEAD(page_list); | 1164 | struct page *page; |
1123 | struct pagevec pvec; | 1165 | struct pagevec pvec; |
1124 | unsigned long nr_scanned = 0; | ||
1125 | unsigned long nr_reclaimed = 0; | ||
1126 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | 1166 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); |
1127 | 1167 | ||
1128 | while (unlikely(too_many_isolated(zone, file, sc))) { | 1168 | pagevec_init(&pvec, 1); |
1129 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
1130 | 1169 | ||
1131 | /* We are about to die and free our memory. Return now. */ | 1170 | /* |
1132 | if (fatal_signal_pending(current)) | 1171 | * Put back any unfreeable pages. |
1133 | return SWAP_CLUSTER_MAX; | 1172 | */ |
1173 | spin_lock(&zone->lru_lock); | ||
1174 | while (!list_empty(page_list)) { | ||
1175 | int lru; | ||
1176 | page = lru_to_page(page_list); | ||
1177 | VM_BUG_ON(PageLRU(page)); | ||
1178 | list_del(&page->lru); | ||
1179 | if (unlikely(!page_evictable(page, NULL))) { | ||
1180 | spin_unlock_irq(&zone->lru_lock); | ||
1181 | putback_lru_page(page); | ||
1182 | spin_lock_irq(&zone->lru_lock); | ||
1183 | continue; | ||
1184 | } | ||
1185 | SetPageLRU(page); | ||
1186 | lru = page_lru(page); | ||
1187 | add_page_to_lru_list(zone, page, lru); | ||
1188 | if (is_active_lru(lru)) { | ||
1189 | int file = is_file_lru(lru); | ||
1190 | reclaim_stat->recent_rotated[file]++; | ||
1191 | } | ||
1192 | if (!pagevec_add(&pvec, page)) { | ||
1193 | spin_unlock_irq(&zone->lru_lock); | ||
1194 | __pagevec_release(&pvec); | ||
1195 | spin_lock_irq(&zone->lru_lock); | ||
1196 | } | ||
1134 | } | 1197 | } |
1198 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); | ||
1199 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); | ||
1200 | |||
1201 | spin_unlock_irq(&zone->lru_lock); | ||
1202 | pagevec_release(&pvec); | ||
1203 | } | ||
1135 | 1204 | ||
1205 | static noinline_for_stack void update_isolated_counts(struct zone *zone, | ||
1206 | struct scan_control *sc, | ||
1207 | unsigned long *nr_anon, | ||
1208 | unsigned long *nr_file, | ||
1209 | struct list_head *isolated_list) | ||
1210 | { | ||
1211 | unsigned long nr_active; | ||
1212 | unsigned int count[NR_LRU_LISTS] = { 0, }; | ||
1213 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); | ||
1136 | 1214 | ||
1137 | pagevec_init(&pvec, 1); | 1215 | nr_active = clear_active_flags(isolated_list, count); |
1216 | __count_vm_events(PGDEACTIVATE, nr_active); | ||
1138 | 1217 | ||
1139 | lru_add_drain(); | 1218 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, |
1140 | spin_lock_irq(&zone->lru_lock); | 1219 | -count[LRU_ACTIVE_FILE]); |
1141 | do { | 1220 | __mod_zone_page_state(zone, NR_INACTIVE_FILE, |
1142 | struct page *page; | 1221 | -count[LRU_INACTIVE_FILE]); |
1143 | unsigned long nr_taken; | 1222 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, |
1144 | unsigned long nr_scan; | 1223 | -count[LRU_ACTIVE_ANON]); |
1145 | unsigned long nr_freed; | 1224 | __mod_zone_page_state(zone, NR_INACTIVE_ANON, |
1146 | unsigned long nr_active; | 1225 | -count[LRU_INACTIVE_ANON]); |
1147 | unsigned int count[NR_LRU_LISTS] = { 0, }; | ||
1148 | int mode = sc->lumpy_reclaim_mode ? ISOLATE_BOTH : ISOLATE_INACTIVE; | ||
1149 | unsigned long nr_anon; | ||
1150 | unsigned long nr_file; | ||
1151 | 1226 | ||
1152 | if (scanning_global_lru(sc)) { | 1227 | *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; |
1153 | nr_taken = isolate_pages_global(SWAP_CLUSTER_MAX, | 1228 | *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; |
1154 | &page_list, &nr_scan, | 1229 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); |
1155 | sc->order, mode, | 1230 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); |
1156 | zone, 0, file); | ||
1157 | zone->pages_scanned += nr_scan; | ||
1158 | if (current_is_kswapd()) | ||
1159 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, | ||
1160 | nr_scan); | ||
1161 | else | ||
1162 | __count_zone_vm_events(PGSCAN_DIRECT, zone, | ||
1163 | nr_scan); | ||
1164 | } else { | ||
1165 | nr_taken = mem_cgroup_isolate_pages(SWAP_CLUSTER_MAX, | ||
1166 | &page_list, &nr_scan, | ||
1167 | sc->order, mode, | ||
1168 | zone, sc->mem_cgroup, | ||
1169 | 0, file); | ||
1170 | /* | ||
1171 | * mem_cgroup_isolate_pages() keeps track of | ||
1172 | * scanned pages on its own. | ||
1173 | */ | ||
1174 | } | ||
1175 | 1231 | ||
1176 | if (nr_taken == 0) | 1232 | reclaim_stat->recent_scanned[0] += *nr_anon; |
1177 | goto done; | 1233 | reclaim_stat->recent_scanned[1] += *nr_file; |
1234 | } | ||
1178 | 1235 | ||
1179 | nr_active = clear_active_flags(&page_list, count); | 1236 | /* |
1180 | __count_vm_events(PGDEACTIVATE, nr_active); | 1237 | * Returns true if the caller should wait to clean dirty/writeback pages. |
1238 | * | ||
1239 | * If we are direct reclaiming for contiguous pages and we do not reclaim | ||
1240 | * everything in the list, try again and wait for writeback IO to complete. | ||
1241 | * This will stall high-order allocations noticeably. Only do that when really | ||
1242 | * need to free the pages under high memory pressure. | ||
1243 | */ | ||
1244 | static inline bool should_reclaim_stall(unsigned long nr_taken, | ||
1245 | unsigned long nr_freed, | ||
1246 | int priority, | ||
1247 | struct scan_control *sc) | ||
1248 | { | ||
1249 | int lumpy_stall_priority; | ||
1181 | 1250 | ||
1182 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, | 1251 | /* kswapd should not stall on sync IO */ |
1183 | -count[LRU_ACTIVE_FILE]); | 1252 | if (current_is_kswapd()) |
1184 | __mod_zone_page_state(zone, NR_INACTIVE_FILE, | 1253 | return false; |
1185 | -count[LRU_INACTIVE_FILE]); | ||
1186 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, | ||
1187 | -count[LRU_ACTIVE_ANON]); | ||
1188 | __mod_zone_page_state(zone, NR_INACTIVE_ANON, | ||
1189 | -count[LRU_INACTIVE_ANON]); | ||
1190 | 1254 | ||
1191 | nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; | 1255 | /* Only stall on lumpy reclaim */ |
1192 | nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; | 1256 | if (!sc->lumpy_reclaim_mode) |
1193 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon); | 1257 | return false; |
1194 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file); | ||
1195 | 1258 | ||
1196 | reclaim_stat->recent_scanned[0] += nr_anon; | 1259 | /* If we have relaimed everything on the isolated list, no stall */ |
1197 | reclaim_stat->recent_scanned[1] += nr_file; | 1260 | if (nr_freed == nr_taken) |
1261 | return false; | ||
1198 | 1262 | ||
1199 | spin_unlock_irq(&zone->lru_lock); | 1263 | /* |
1264 | * For high-order allocations, there are two stall thresholds. | ||
1265 | * High-cost allocations stall immediately where as lower | ||
1266 | * order allocations such as stacks require the scanning | ||
1267 | * priority to be much higher before stalling. | ||
1268 | */ | ||
1269 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
1270 | lumpy_stall_priority = DEF_PRIORITY; | ||
1271 | else | ||
1272 | lumpy_stall_priority = DEF_PRIORITY / 3; | ||
1200 | 1273 | ||
1201 | nr_scanned += nr_scan; | 1274 | return priority <= lumpy_stall_priority; |
1202 | nr_freed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); | 1275 | } |
1203 | 1276 | ||
1277 | /* | ||
1278 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number | ||
1279 | * of reclaimed pages | ||
1280 | */ | ||
1281 | static noinline_for_stack unsigned long | ||
1282 | shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | ||
1283 | struct scan_control *sc, int priority, int file) | ||
1284 | { | ||
1285 | LIST_HEAD(page_list); | ||
1286 | unsigned long nr_scanned; | ||
1287 | unsigned long nr_reclaimed = 0; | ||
1288 | unsigned long nr_taken; | ||
1289 | unsigned long nr_active; | ||
1290 | unsigned long nr_anon; | ||
1291 | unsigned long nr_file; | ||
1292 | |||
1293 | while (unlikely(too_many_isolated(zone, file, sc))) { | ||
1294 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
1295 | |||
1296 | /* We are about to die and free our memory. Return now. */ | ||
1297 | if (fatal_signal_pending(current)) | ||
1298 | return SWAP_CLUSTER_MAX; | ||
1299 | } | ||
1300 | |||
1301 | |||
1302 | lru_add_drain(); | ||
1303 | spin_lock_irq(&zone->lru_lock); | ||
1304 | |||
1305 | if (scanning_global_lru(sc)) { | ||
1306 | nr_taken = isolate_pages_global(nr_to_scan, | ||
1307 | &page_list, &nr_scanned, sc->order, | ||
1308 | sc->lumpy_reclaim_mode ? | ||
1309 | ISOLATE_BOTH : ISOLATE_INACTIVE, | ||
1310 | zone, 0, file); | ||
1311 | zone->pages_scanned += nr_scanned; | ||
1312 | if (current_is_kswapd()) | ||
1313 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, | ||
1314 | nr_scanned); | ||
1315 | else | ||
1316 | __count_zone_vm_events(PGSCAN_DIRECT, zone, | ||
1317 | nr_scanned); | ||
1318 | } else { | ||
1319 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, | ||
1320 | &page_list, &nr_scanned, sc->order, | ||
1321 | sc->lumpy_reclaim_mode ? | ||
1322 | ISOLATE_BOTH : ISOLATE_INACTIVE, | ||
1323 | zone, sc->mem_cgroup, | ||
1324 | 0, file); | ||
1204 | /* | 1325 | /* |
1205 | * If we are direct reclaiming for contiguous pages and we do | 1326 | * mem_cgroup_isolate_pages() keeps track of |
1206 | * not reclaim everything in the list, try again and wait | 1327 | * scanned pages on its own. |
1207 | * for IO to complete. This will stall high-order allocations | ||
1208 | * but that should be acceptable to the caller | ||
1209 | */ | 1328 | */ |
1210 | if (nr_freed < nr_taken && !current_is_kswapd() && | 1329 | } |
1211 | sc->lumpy_reclaim_mode) { | ||
1212 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
1213 | 1330 | ||
1214 | /* | 1331 | if (nr_taken == 0) { |
1215 | * The attempt at page out may have made some | 1332 | spin_unlock_irq(&zone->lru_lock); |
1216 | * of the pages active, mark them inactive again. | 1333 | return 0; |
1217 | */ | 1334 | } |
1218 | nr_active = clear_active_flags(&page_list, count); | ||
1219 | count_vm_events(PGDEACTIVATE, nr_active); | ||
1220 | 1335 | ||
1221 | nr_freed += shrink_page_list(&page_list, sc, | 1336 | update_isolated_counts(zone, sc, &nr_anon, &nr_file, &page_list); |
1222 | PAGEOUT_IO_SYNC); | ||
1223 | } | ||
1224 | 1337 | ||
1225 | nr_reclaimed += nr_freed; | 1338 | spin_unlock_irq(&zone->lru_lock); |
1226 | 1339 | ||
1227 | local_irq_disable(); | 1340 | nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); |
1228 | if (current_is_kswapd()) | 1341 | |
1229 | __count_vm_events(KSWAPD_STEAL, nr_freed); | 1342 | /* Check if we should syncronously wait for writeback */ |
1230 | __count_zone_vm_events(PGSTEAL, zone, nr_freed); | 1343 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { |
1344 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
1231 | 1345 | ||
1232 | spin_lock(&zone->lru_lock); | ||
1233 | /* | 1346 | /* |
1234 | * Put back any unfreeable pages. | 1347 | * The attempt at page out may have made some |
1348 | * of the pages active, mark them inactive again. | ||
1235 | */ | 1349 | */ |
1236 | while (!list_empty(&page_list)) { | 1350 | nr_active = clear_active_flags(&page_list, NULL); |
1237 | int lru; | 1351 | count_vm_events(PGDEACTIVATE, nr_active); |
1238 | page = lru_to_page(&page_list); | ||
1239 | VM_BUG_ON(PageLRU(page)); | ||
1240 | list_del(&page->lru); | ||
1241 | if (unlikely(!page_evictable(page, NULL))) { | ||
1242 | spin_unlock_irq(&zone->lru_lock); | ||
1243 | putback_lru_page(page); | ||
1244 | spin_lock_irq(&zone->lru_lock); | ||
1245 | continue; | ||
1246 | } | ||
1247 | SetPageLRU(page); | ||
1248 | lru = page_lru(page); | ||
1249 | add_page_to_lru_list(zone, page, lru); | ||
1250 | if (is_active_lru(lru)) { | ||
1251 | int file = is_file_lru(lru); | ||
1252 | reclaim_stat->recent_rotated[file]++; | ||
1253 | } | ||
1254 | if (!pagevec_add(&pvec, page)) { | ||
1255 | spin_unlock_irq(&zone->lru_lock); | ||
1256 | __pagevec_release(&pvec); | ||
1257 | spin_lock_irq(&zone->lru_lock); | ||
1258 | } | ||
1259 | } | ||
1260 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); | ||
1261 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); | ||
1262 | 1352 | ||
1263 | } while (nr_scanned < max_scan); | 1353 | nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC); |
1354 | } | ||
1264 | 1355 | ||
1265 | done: | 1356 | local_irq_disable(); |
1266 | spin_unlock_irq(&zone->lru_lock); | 1357 | if (current_is_kswapd()) |
1267 | pagevec_release(&pvec); | 1358 | __count_vm_events(KSWAPD_STEAL, nr_reclaimed); |
1268 | return nr_reclaimed; | 1359 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); |
1269 | } | ||
1270 | 1360 | ||
1271 | /* | 1361 | putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); |
1272 | * We are about to scan this zone at a certain priority level. If that priority | 1362 | return nr_reclaimed; |
1273 | * level is smaller (ie: more urgent) than the previous priority, then note | ||
1274 | * that priority level within the zone. This is done so that when the next | ||
1275 | * process comes in to scan this zone, it will immediately start out at this | ||
1276 | * priority level rather than having to build up its own scanning priority. | ||
1277 | * Here, this priority affects only the reclaim-mapped threshold. | ||
1278 | */ | ||
1279 | static inline void note_zone_scanning_priority(struct zone *zone, int priority) | ||
1280 | { | ||
1281 | if (priority < zone->prev_priority) | ||
1282 | zone->prev_priority = priority; | ||
1283 | } | 1363 | } |
1284 | 1364 | ||
1285 | /* | 1365 | /* |
@@ -1583,6 +1663,13 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1583 | } | 1663 | } |
1584 | 1664 | ||
1585 | /* | 1665 | /* |
1666 | * With swappiness at 100, anonymous and file have the same priority. | ||
1667 | * This scanning priority is essentially the inverse of IO cost. | ||
1668 | */ | ||
1669 | anon_prio = sc->swappiness; | ||
1670 | file_prio = 200 - sc->swappiness; | ||
1671 | |||
1672 | /* | ||
1586 | * OK, so we have swap space and a fair amount of page cache | 1673 | * OK, so we have swap space and a fair amount of page cache |
1587 | * pages. We use the recently rotated / recently scanned | 1674 | * pages. We use the recently rotated / recently scanned |
1588 | * ratios to determine how valuable each cache is. | 1675 | * ratios to determine how valuable each cache is. |
@@ -1593,28 +1680,18 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1593 | * | 1680 | * |
1594 | * anon in [0], file in [1] | 1681 | * anon in [0], file in [1] |
1595 | */ | 1682 | */ |
1683 | spin_lock_irq(&zone->lru_lock); | ||
1596 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { | 1684 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { |
1597 | spin_lock_irq(&zone->lru_lock); | ||
1598 | reclaim_stat->recent_scanned[0] /= 2; | 1685 | reclaim_stat->recent_scanned[0] /= 2; |
1599 | reclaim_stat->recent_rotated[0] /= 2; | 1686 | reclaim_stat->recent_rotated[0] /= 2; |
1600 | spin_unlock_irq(&zone->lru_lock); | ||
1601 | } | 1687 | } |
1602 | 1688 | ||
1603 | if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) { | 1689 | if (unlikely(reclaim_stat->recent_scanned[1] > file / 4)) { |
1604 | spin_lock_irq(&zone->lru_lock); | ||
1605 | reclaim_stat->recent_scanned[1] /= 2; | 1690 | reclaim_stat->recent_scanned[1] /= 2; |
1606 | reclaim_stat->recent_rotated[1] /= 2; | 1691 | reclaim_stat->recent_rotated[1] /= 2; |
1607 | spin_unlock_irq(&zone->lru_lock); | ||
1608 | } | 1692 | } |
1609 | 1693 | ||
1610 | /* | 1694 | /* |
1611 | * With swappiness at 100, anonymous and file have the same priority. | ||
1612 | * This scanning priority is essentially the inverse of IO cost. | ||
1613 | */ | ||
1614 | anon_prio = sc->swappiness; | ||
1615 | file_prio = 200 - sc->swappiness; | ||
1616 | |||
1617 | /* | ||
1618 | * The amount of pressure on anon vs file pages is inversely | 1695 | * The amount of pressure on anon vs file pages is inversely |
1619 | * proportional to the fraction of recently scanned pages on | 1696 | * proportional to the fraction of recently scanned pages on |
1620 | * each list that were recently referenced and in active use. | 1697 | * each list that were recently referenced and in active use. |
@@ -1624,6 +1701,7 @@ static void get_scan_count(struct zone *zone, struct scan_control *sc, | |||
1624 | 1701 | ||
1625 | fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); | 1702 | fp = (file_prio + 1) * (reclaim_stat->recent_scanned[1] + 1); |
1626 | fp /= reclaim_stat->recent_rotated[1] + 1; | 1703 | fp /= reclaim_stat->recent_rotated[1] + 1; |
1704 | spin_unlock_irq(&zone->lru_lock); | ||
1627 | 1705 | ||
1628 | fraction[0] = ap; | 1706 | fraction[0] = ap; |
1629 | fraction[1] = fp; | 1707 | fraction[1] = fp; |
@@ -1729,13 +1807,12 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1729 | static bool shrink_zones(int priority, struct zonelist *zonelist, | 1807 | static bool shrink_zones(int priority, struct zonelist *zonelist, |
1730 | struct scan_control *sc) | 1808 | struct scan_control *sc) |
1731 | { | 1809 | { |
1732 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | ||
1733 | struct zoneref *z; | 1810 | struct zoneref *z; |
1734 | struct zone *zone; | 1811 | struct zone *zone; |
1735 | bool all_unreclaimable = true; | 1812 | bool all_unreclaimable = true; |
1736 | 1813 | ||
1737 | for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, | 1814 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
1738 | sc->nodemask) { | 1815 | gfp_zone(sc->gfp_mask), sc->nodemask) { |
1739 | if (!populated_zone(zone)) | 1816 | if (!populated_zone(zone)) |
1740 | continue; | 1817 | continue; |
1741 | /* | 1818 | /* |
@@ -1745,17 +1822,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
1745 | if (scanning_global_lru(sc)) { | 1822 | if (scanning_global_lru(sc)) { |
1746 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1823 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1747 | continue; | 1824 | continue; |
1748 | note_zone_scanning_priority(zone, priority); | ||
1749 | |||
1750 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 1825 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
1751 | continue; /* Let kswapd poll it */ | 1826 | continue; /* Let kswapd poll it */ |
1752 | } else { | ||
1753 | /* | ||
1754 | * Ignore cpuset limitation here. We just want to reduce | ||
1755 | * # of used pages by us regardless of memory shortage. | ||
1756 | */ | ||
1757 | mem_cgroup_note_reclaim_priority(sc->mem_cgroup, | ||
1758 | priority); | ||
1759 | } | 1827 | } |
1760 | 1828 | ||
1761 | shrink_zone(priority, zone, sc); | 1829 | shrink_zone(priority, zone, sc); |
@@ -1787,10 +1855,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1787 | bool all_unreclaimable; | 1855 | bool all_unreclaimable; |
1788 | unsigned long total_scanned = 0; | 1856 | unsigned long total_scanned = 0; |
1789 | struct reclaim_state *reclaim_state = current->reclaim_state; | 1857 | struct reclaim_state *reclaim_state = current->reclaim_state; |
1790 | unsigned long lru_pages = 0; | ||
1791 | struct zoneref *z; | 1858 | struct zoneref *z; |
1792 | struct zone *zone; | 1859 | struct zone *zone; |
1793 | enum zone_type high_zoneidx = gfp_zone(sc->gfp_mask); | ||
1794 | unsigned long writeback_threshold; | 1860 | unsigned long writeback_threshold; |
1795 | 1861 | ||
1796 | get_mems_allowed(); | 1862 | get_mems_allowed(); |
@@ -1798,18 +1864,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1798 | 1864 | ||
1799 | if (scanning_global_lru(sc)) | 1865 | if (scanning_global_lru(sc)) |
1800 | count_vm_event(ALLOCSTALL); | 1866 | count_vm_event(ALLOCSTALL); |
1801 | /* | ||
1802 | * mem_cgroup will not do shrink_slab. | ||
1803 | */ | ||
1804 | if (scanning_global_lru(sc)) { | ||
1805 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | ||
1806 | |||
1807 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | ||
1808 | continue; | ||
1809 | |||
1810 | lru_pages += zone_reclaimable_pages(zone); | ||
1811 | } | ||
1812 | } | ||
1813 | 1867 | ||
1814 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 1868 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
1815 | sc->nr_scanned = 0; | 1869 | sc->nr_scanned = 0; |
@@ -1821,6 +1875,15 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1821 | * over limit cgroups | 1875 | * over limit cgroups |
1822 | */ | 1876 | */ |
1823 | if (scanning_global_lru(sc)) { | 1877 | if (scanning_global_lru(sc)) { |
1878 | unsigned long lru_pages = 0; | ||
1879 | for_each_zone_zonelist(zone, z, zonelist, | ||
1880 | gfp_zone(sc->gfp_mask)) { | ||
1881 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | ||
1882 | continue; | ||
1883 | |||
1884 | lru_pages += zone_reclaimable_pages(zone); | ||
1885 | } | ||
1886 | |||
1824 | shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); | 1887 | shrink_slab(sc->nr_scanned, sc->gfp_mask, lru_pages); |
1825 | if (reclaim_state) { | 1888 | if (reclaim_state) { |
1826 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; | 1889 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; |
@@ -1861,17 +1924,6 @@ out: | |||
1861 | if (priority < 0) | 1924 | if (priority < 0) |
1862 | priority = 0; | 1925 | priority = 0; |
1863 | 1926 | ||
1864 | if (scanning_global_lru(sc)) { | ||
1865 | for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { | ||
1866 | |||
1867 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | ||
1868 | continue; | ||
1869 | |||
1870 | zone->prev_priority = priority; | ||
1871 | } | ||
1872 | } else | ||
1873 | mem_cgroup_record_reclaim_priority(sc->mem_cgroup, priority); | ||
1874 | |||
1875 | delayacct_freepages_end(); | 1927 | delayacct_freepages_end(); |
1876 | put_mems_allowed(); | 1928 | put_mems_allowed(); |
1877 | 1929 | ||
@@ -1888,6 +1940,7 @@ out: | |||
1888 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 1940 | unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
1889 | gfp_t gfp_mask, nodemask_t *nodemask) | 1941 | gfp_t gfp_mask, nodemask_t *nodemask) |
1890 | { | 1942 | { |
1943 | unsigned long nr_reclaimed; | ||
1891 | struct scan_control sc = { | 1944 | struct scan_control sc = { |
1892 | .gfp_mask = gfp_mask, | 1945 | .gfp_mask = gfp_mask, |
1893 | .may_writepage = !laptop_mode, | 1946 | .may_writepage = !laptop_mode, |
@@ -1900,7 +1953,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1900 | .nodemask = nodemask, | 1953 | .nodemask = nodemask, |
1901 | }; | 1954 | }; |
1902 | 1955 | ||
1903 | return do_try_to_free_pages(zonelist, &sc); | 1956 | trace_mm_vmscan_direct_reclaim_begin(order, |
1957 | sc.may_writepage, | ||
1958 | gfp_mask); | ||
1959 | |||
1960 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | ||
1961 | |||
1962 | trace_mm_vmscan_direct_reclaim_end(nr_reclaimed); | ||
1963 | |||
1964 | return nr_reclaimed; | ||
1904 | } | 1965 | } |
1905 | 1966 | ||
1906 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 1967 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR |
@@ -1908,9 +1969,10 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
1908 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | 1969 | unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, |
1909 | gfp_t gfp_mask, bool noswap, | 1970 | gfp_t gfp_mask, bool noswap, |
1910 | unsigned int swappiness, | 1971 | unsigned int swappiness, |
1911 | struct zone *zone, int nid) | 1972 | struct zone *zone) |
1912 | { | 1973 | { |
1913 | struct scan_control sc = { | 1974 | struct scan_control sc = { |
1975 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | ||
1914 | .may_writepage = !laptop_mode, | 1976 | .may_writepage = !laptop_mode, |
1915 | .may_unmap = 1, | 1977 | .may_unmap = 1, |
1916 | .may_swap = !noswap, | 1978 | .may_swap = !noswap, |
@@ -1918,13 +1980,13 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
1918 | .order = 0, | 1980 | .order = 0, |
1919 | .mem_cgroup = mem, | 1981 | .mem_cgroup = mem, |
1920 | }; | 1982 | }; |
1921 | nodemask_t nm = nodemask_of_node(nid); | ||
1922 | |||
1923 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 1983 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
1924 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 1984 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
1925 | sc.nodemask = &nm; | 1985 | |
1926 | sc.nr_reclaimed = 0; | 1986 | trace_mm_vmscan_memcg_softlimit_reclaim_begin(0, |
1927 | sc.nr_scanned = 0; | 1987 | sc.may_writepage, |
1988 | sc.gfp_mask); | ||
1989 | |||
1928 | /* | 1990 | /* |
1929 | * NOTE: Although we can get the priority field, using it | 1991 | * NOTE: Although we can get the priority field, using it |
1930 | * here is not a good idea, since it limits the pages we can scan. | 1992 | * here is not a good idea, since it limits the pages we can scan. |
@@ -1933,6 +1995,9 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | |||
1933 | * the priority and make it zero. | 1995 | * the priority and make it zero. |
1934 | */ | 1996 | */ |
1935 | shrink_zone(0, zone, &sc); | 1997 | shrink_zone(0, zone, &sc); |
1998 | |||
1999 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | ||
2000 | |||
1936 | return sc.nr_reclaimed; | 2001 | return sc.nr_reclaimed; |
1937 | } | 2002 | } |
1938 | 2003 | ||
@@ -1942,6 +2007,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1942 | unsigned int swappiness) | 2007 | unsigned int swappiness) |
1943 | { | 2008 | { |
1944 | struct zonelist *zonelist; | 2009 | struct zonelist *zonelist; |
2010 | unsigned long nr_reclaimed; | ||
1945 | struct scan_control sc = { | 2011 | struct scan_control sc = { |
1946 | .may_writepage = !laptop_mode, | 2012 | .may_writepage = !laptop_mode, |
1947 | .may_unmap = 1, | 2013 | .may_unmap = 1, |
@@ -1956,7 +2022,16 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, | |||
1956 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2022 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
1957 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2023 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
1958 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; | 2024 | zonelist = NODE_DATA(numa_node_id())->node_zonelists; |
1959 | return do_try_to_free_pages(zonelist, &sc); | 2025 | |
2026 | trace_mm_vmscan_memcg_reclaim_begin(0, | ||
2027 | sc.may_writepage, | ||
2028 | sc.gfp_mask); | ||
2029 | |||
2030 | nr_reclaimed = do_try_to_free_pages(zonelist, &sc); | ||
2031 | |||
2032 | trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); | ||
2033 | |||
2034 | return nr_reclaimed; | ||
1960 | } | 2035 | } |
1961 | #endif | 2036 | #endif |
1962 | 2037 | ||
@@ -2028,22 +2103,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order) | |||
2028 | .order = order, | 2103 | .order = order, |
2029 | .mem_cgroup = NULL, | 2104 | .mem_cgroup = NULL, |
2030 | }; | 2105 | }; |
2031 | /* | ||
2032 | * temp_priority is used to remember the scanning priority at which | ||
2033 | * this zone was successfully refilled to | ||
2034 | * free_pages == high_wmark_pages(zone). | ||
2035 | */ | ||
2036 | int temp_priority[MAX_NR_ZONES]; | ||
2037 | |||
2038 | loop_again: | 2106 | loop_again: |
2039 | total_scanned = 0; | 2107 | total_scanned = 0; |
2040 | sc.nr_reclaimed = 0; | 2108 | sc.nr_reclaimed = 0; |
2041 | sc.may_writepage = !laptop_mode; | 2109 | sc.may_writepage = !laptop_mode; |
2042 | count_vm_event(PAGEOUTRUN); | 2110 | count_vm_event(PAGEOUTRUN); |
2043 | 2111 | ||
2044 | for (i = 0; i < pgdat->nr_zones; i++) | ||
2045 | temp_priority[i] = DEF_PRIORITY; | ||
2046 | |||
2047 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2112 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { |
2048 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2113 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2049 | unsigned long lru_pages = 0; | 2114 | unsigned long lru_pages = 0; |
@@ -2103,7 +2168,6 @@ loop_again: | |||
2103 | for (i = 0; i <= end_zone; i++) { | 2168 | for (i = 0; i <= end_zone; i++) { |
2104 | struct zone *zone = pgdat->node_zones + i; | 2169 | struct zone *zone = pgdat->node_zones + i; |
2105 | int nr_slab; | 2170 | int nr_slab; |
2106 | int nid, zid; | ||
2107 | 2171 | ||
2108 | if (!populated_zone(zone)) | 2172 | if (!populated_zone(zone)) |
2109 | continue; | 2173 | continue; |
@@ -2111,18 +2175,14 @@ loop_again: | |||
2111 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2175 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) |
2112 | continue; | 2176 | continue; |
2113 | 2177 | ||
2114 | temp_priority[i] = priority; | ||
2115 | sc.nr_scanned = 0; | 2178 | sc.nr_scanned = 0; |
2116 | note_zone_scanning_priority(zone, priority); | ||
2117 | 2179 | ||
2118 | nid = pgdat->node_id; | ||
2119 | zid = zone_idx(zone); | ||
2120 | /* | 2180 | /* |
2121 | * Call soft limit reclaim before calling shrink_zone. | 2181 | * Call soft limit reclaim before calling shrink_zone. |
2122 | * For now we ignore the return value | 2182 | * For now we ignore the return value |
2123 | */ | 2183 | */ |
2124 | mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask, | 2184 | mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask); |
2125 | nid, zid); | 2185 | |
2126 | /* | 2186 | /* |
2127 | * We put equal pressure on every zone, unless one | 2187 | * We put equal pressure on every zone, unless one |
2128 | * zone has way too many pages free already. | 2188 | * zone has way too many pages free already. |
@@ -2186,16 +2246,6 @@ loop_again: | |||
2186 | break; | 2246 | break; |
2187 | } | 2247 | } |
2188 | out: | 2248 | out: |
2189 | /* | ||
2190 | * Note within each zone the priority level at which this zone was | ||
2191 | * brought into a happy state. So that the next thread which scans this | ||
2192 | * zone will start out at that priority level. | ||
2193 | */ | ||
2194 | for (i = 0; i < pgdat->nr_zones; i++) { | ||
2195 | struct zone *zone = pgdat->node_zones + i; | ||
2196 | |||
2197 | zone->prev_priority = temp_priority[i]; | ||
2198 | } | ||
2199 | if (!all_zones_ok) { | 2249 | if (!all_zones_ok) { |
2200 | cond_resched(); | 2250 | cond_resched(); |
2201 | 2251 | ||
@@ -2299,9 +2349,10 @@ static int kswapd(void *p) | |||
2299 | * premature sleep. If not, then go fully | 2349 | * premature sleep. If not, then go fully |
2300 | * to sleep until explicitly woken up | 2350 | * to sleep until explicitly woken up |
2301 | */ | 2351 | */ |
2302 | if (!sleeping_prematurely(pgdat, order, remaining)) | 2352 | if (!sleeping_prematurely(pgdat, order, remaining)) { |
2353 | trace_mm_vmscan_kswapd_sleep(pgdat->node_id); | ||
2303 | schedule(); | 2354 | schedule(); |
2304 | else { | 2355 | } else { |
2305 | if (remaining) | 2356 | if (remaining) |
2306 | count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY); | 2357 | count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY); |
2307 | else | 2358 | else |
@@ -2321,8 +2372,10 @@ static int kswapd(void *p) | |||
2321 | * We can speed up thawing tasks if we don't call balance_pgdat | 2372 | * We can speed up thawing tasks if we don't call balance_pgdat |
2322 | * after returning from the refrigerator | 2373 | * after returning from the refrigerator |
2323 | */ | 2374 | */ |
2324 | if (!ret) | 2375 | if (!ret) { |
2376 | trace_mm_vmscan_kswapd_wake(pgdat->node_id, order); | ||
2325 | balance_pgdat(pgdat, order); | 2377 | balance_pgdat(pgdat, order); |
2378 | } | ||
2326 | } | 2379 | } |
2327 | return 0; | 2380 | return 0; |
2328 | } | 2381 | } |
@@ -2342,6 +2395,7 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
2342 | return; | 2395 | return; |
2343 | if (pgdat->kswapd_max_order < order) | 2396 | if (pgdat->kswapd_max_order < order) |
2344 | pgdat->kswapd_max_order = order; | 2397 | pgdat->kswapd_max_order = order; |
2398 | trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order); | ||
2345 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 2399 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2346 | return; | 2400 | return; |
2347 | if (!waitqueue_active(&pgdat->kswapd_wait)) | 2401 | if (!waitqueue_active(&pgdat->kswapd_wait)) |
@@ -2590,9 +2644,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2590 | .swappiness = vm_swappiness, | 2644 | .swappiness = vm_swappiness, |
2591 | .order = order, | 2645 | .order = order, |
2592 | }; | 2646 | }; |
2593 | unsigned long slab_reclaimable; | 2647 | unsigned long nr_slab_pages0, nr_slab_pages1; |
2594 | 2648 | ||
2595 | disable_swap_token(); | ||
2596 | cond_resched(); | 2649 | cond_resched(); |
2597 | /* | 2650 | /* |
2598 | * We need to be able to allocate from the reserves for RECLAIM_SWAP | 2651 | * We need to be able to allocate from the reserves for RECLAIM_SWAP |
@@ -2611,14 +2664,13 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2611 | */ | 2664 | */ |
2612 | priority = ZONE_RECLAIM_PRIORITY; | 2665 | priority = ZONE_RECLAIM_PRIORITY; |
2613 | do { | 2666 | do { |
2614 | note_zone_scanning_priority(zone, priority); | ||
2615 | shrink_zone(priority, zone, &sc); | 2667 | shrink_zone(priority, zone, &sc); |
2616 | priority--; | 2668 | priority--; |
2617 | } while (priority >= 0 && sc.nr_reclaimed < nr_pages); | 2669 | } while (priority >= 0 && sc.nr_reclaimed < nr_pages); |
2618 | } | 2670 | } |
2619 | 2671 | ||
2620 | slab_reclaimable = zone_page_state(zone, NR_SLAB_RECLAIMABLE); | 2672 | nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); |
2621 | if (slab_reclaimable > zone->min_slab_pages) { | 2673 | if (nr_slab_pages0 > zone->min_slab_pages) { |
2622 | /* | 2674 | /* |
2623 | * shrink_slab() does not currently allow us to determine how | 2675 | * shrink_slab() does not currently allow us to determine how |
2624 | * many pages were freed in this zone. So we take the current | 2676 | * many pages were freed in this zone. So we take the current |
@@ -2629,17 +2681,27 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
2629 | * Note that shrink_slab will free memory on all zones and may | 2681 | * Note that shrink_slab will free memory on all zones and may |
2630 | * take a long time. | 2682 | * take a long time. |
2631 | */ | 2683 | */ |
2632 | while (shrink_slab(sc.nr_scanned, gfp_mask, order) && | 2684 | for (;;) { |
2633 | zone_page_state(zone, NR_SLAB_RECLAIMABLE) > | 2685 | unsigned long lru_pages = zone_reclaimable_pages(zone); |
2634 | slab_reclaimable - nr_pages) | 2686 | |
2635 | ; | 2687 | /* No reclaimable slab or very low memory pressure */ |
2688 | if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages)) | ||
2689 | break; | ||
2690 | |||
2691 | /* Freed enough memory */ | ||
2692 | nr_slab_pages1 = zone_page_state(zone, | ||
2693 | NR_SLAB_RECLAIMABLE); | ||
2694 | if (nr_slab_pages1 + nr_pages <= nr_slab_pages0) | ||
2695 | break; | ||
2696 | } | ||
2636 | 2697 | ||
2637 | /* | 2698 | /* |
2638 | * Update nr_reclaimed by the number of slab pages we | 2699 | * Update nr_reclaimed by the number of slab pages we |
2639 | * reclaimed from this zone. | 2700 | * reclaimed from this zone. |
2640 | */ | 2701 | */ |
2641 | sc.nr_reclaimed += slab_reclaimable - | 2702 | nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); |
2642 | zone_page_state(zone, NR_SLAB_RECLAIMABLE); | 2703 | if (nr_slab_pages1 < nr_slab_pages0) |
2704 | sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1; | ||
2643 | } | 2705 | } |
2644 | 2706 | ||
2645 | p->reclaim_state = NULL; | 2707 | p->reclaim_state = NULL; |