diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 218 |
1 files changed, 141 insertions, 77 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index b94c9464f262..b8a6fdc21312 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -51,6 +51,12 @@ | |||
51 | #define CREATE_TRACE_POINTS | 51 | #define CREATE_TRACE_POINTS |
52 | #include <trace/events/vmscan.h> | 52 | #include <trace/events/vmscan.h> |
53 | 53 | ||
54 | enum lumpy_mode { | ||
55 | LUMPY_MODE_NONE, | ||
56 | LUMPY_MODE_ASYNC, | ||
57 | LUMPY_MODE_SYNC, | ||
58 | }; | ||
59 | |||
54 | struct scan_control { | 60 | struct scan_control { |
55 | /* Incremented by the number of inactive pages that were scanned */ | 61 | /* Incremented by the number of inactive pages that were scanned */ |
56 | unsigned long nr_scanned; | 62 | unsigned long nr_scanned; |
@@ -82,7 +88,7 @@ struct scan_control { | |||
82 | * Intend to reclaim enough continuous memory rather than reclaim | 88 | * Intend to reclaim enough continuous memory rather than reclaim |
83 | * enough amount of memory. i.e, mode for high order allocation. | 89 | * enough amount of memory. i.e, mode for high order allocation. |
84 | */ | 90 | */ |
85 | bool lumpy_reclaim_mode; | 91 | enum lumpy_mode lumpy_reclaim_mode; |
86 | 92 | ||
87 | /* Which cgroup do we reclaim from */ | 93 | /* Which cgroup do we reclaim from */ |
88 | struct mem_cgroup *mem_cgroup; | 94 | struct mem_cgroup *mem_cgroup; |
@@ -265,6 +271,36 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, | |||
265 | return ret; | 271 | return ret; |
266 | } | 272 | } |
267 | 273 | ||
274 | static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc, | ||
275 | bool sync) | ||
276 | { | ||
277 | enum lumpy_mode mode = sync ? LUMPY_MODE_SYNC : LUMPY_MODE_ASYNC; | ||
278 | |||
279 | /* | ||
280 | * Some reclaim have alredy been failed. No worth to try synchronous | ||
281 | * lumpy reclaim. | ||
282 | */ | ||
283 | if (sync && sc->lumpy_reclaim_mode == LUMPY_MODE_NONE) | ||
284 | return; | ||
285 | |||
286 | /* | ||
287 | * If we need a large contiguous chunk of memory, or have | ||
288 | * trouble getting a small set of contiguous pages, we | ||
289 | * will reclaim both active and inactive pages. | ||
290 | */ | ||
291 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
292 | sc->lumpy_reclaim_mode = mode; | ||
293 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
294 | sc->lumpy_reclaim_mode = mode; | ||
295 | else | ||
296 | sc->lumpy_reclaim_mode = LUMPY_MODE_NONE; | ||
297 | } | ||
298 | |||
299 | static void disable_lumpy_reclaim_mode(struct scan_control *sc) | ||
300 | { | ||
301 | sc->lumpy_reclaim_mode = LUMPY_MODE_NONE; | ||
302 | } | ||
303 | |||
268 | static inline int is_page_cache_freeable(struct page *page) | 304 | static inline int is_page_cache_freeable(struct page *page) |
269 | { | 305 | { |
270 | /* | 306 | /* |
@@ -275,7 +311,8 @@ static inline int is_page_cache_freeable(struct page *page) | |||
275 | return page_count(page) - page_has_private(page) == 2; | 311 | return page_count(page) - page_has_private(page) == 2; |
276 | } | 312 | } |
277 | 313 | ||
278 | static int may_write_to_queue(struct backing_dev_info *bdi) | 314 | static int may_write_to_queue(struct backing_dev_info *bdi, |
315 | struct scan_control *sc) | ||
279 | { | 316 | { |
280 | if (current->flags & PF_SWAPWRITE) | 317 | if (current->flags & PF_SWAPWRITE) |
281 | return 1; | 318 | return 1; |
@@ -283,6 +320,10 @@ static int may_write_to_queue(struct backing_dev_info *bdi) | |||
283 | return 1; | 320 | return 1; |
284 | if (bdi == current->backing_dev_info) | 321 | if (bdi == current->backing_dev_info) |
285 | return 1; | 322 | return 1; |
323 | |||
324 | /* lumpy reclaim for hugepage often need a lot of write */ | ||
325 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
326 | return 1; | ||
286 | return 0; | 327 | return 0; |
287 | } | 328 | } |
288 | 329 | ||
@@ -307,12 +348,6 @@ static void handle_write_error(struct address_space *mapping, | |||
307 | unlock_page(page); | 348 | unlock_page(page); |
308 | } | 349 | } |
309 | 350 | ||
310 | /* Request for sync pageout. */ | ||
311 | enum pageout_io { | ||
312 | PAGEOUT_IO_ASYNC, | ||
313 | PAGEOUT_IO_SYNC, | ||
314 | }; | ||
315 | |||
316 | /* possible outcome of pageout() */ | 351 | /* possible outcome of pageout() */ |
317 | typedef enum { | 352 | typedef enum { |
318 | /* failed to write page out, page is locked */ | 353 | /* failed to write page out, page is locked */ |
@@ -330,7 +365,7 @@ typedef enum { | |||
330 | * Calls ->writepage(). | 365 | * Calls ->writepage(). |
331 | */ | 366 | */ |
332 | static pageout_t pageout(struct page *page, struct address_space *mapping, | 367 | static pageout_t pageout(struct page *page, struct address_space *mapping, |
333 | enum pageout_io sync_writeback) | 368 | struct scan_control *sc) |
334 | { | 369 | { |
335 | /* | 370 | /* |
336 | * If the page is dirty, only perform writeback if that write | 371 | * If the page is dirty, only perform writeback if that write |
@@ -366,7 +401,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
366 | } | 401 | } |
367 | if (mapping->a_ops->writepage == NULL) | 402 | if (mapping->a_ops->writepage == NULL) |
368 | return PAGE_ACTIVATE; | 403 | return PAGE_ACTIVATE; |
369 | if (!may_write_to_queue(mapping->backing_dev_info)) | 404 | if (!may_write_to_queue(mapping->backing_dev_info, sc)) |
370 | return PAGE_KEEP; | 405 | return PAGE_KEEP; |
371 | 406 | ||
372 | if (clear_page_dirty_for_io(page)) { | 407 | if (clear_page_dirty_for_io(page)) { |
@@ -376,7 +411,6 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
376 | .nr_to_write = SWAP_CLUSTER_MAX, | 411 | .nr_to_write = SWAP_CLUSTER_MAX, |
377 | .range_start = 0, | 412 | .range_start = 0, |
378 | .range_end = LLONG_MAX, | 413 | .range_end = LLONG_MAX, |
379 | .nonblocking = 1, | ||
380 | .for_reclaim = 1, | 414 | .for_reclaim = 1, |
381 | }; | 415 | }; |
382 | 416 | ||
@@ -394,7 +428,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
394 | * direct reclaiming a large contiguous area and the | 428 | * direct reclaiming a large contiguous area and the |
395 | * first attempt to free a range of pages fails. | 429 | * first attempt to free a range of pages fails. |
396 | */ | 430 | */ |
397 | if (PageWriteback(page) && sync_writeback == PAGEOUT_IO_SYNC) | 431 | if (PageWriteback(page) && |
432 | sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC) | ||
398 | wait_on_page_writeback(page); | 433 | wait_on_page_writeback(page); |
399 | 434 | ||
400 | if (!PageWriteback(page)) { | 435 | if (!PageWriteback(page)) { |
@@ -402,7 +437,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping, | |||
402 | ClearPageReclaim(page); | 437 | ClearPageReclaim(page); |
403 | } | 438 | } |
404 | trace_mm_vmscan_writepage(page, | 439 | trace_mm_vmscan_writepage(page, |
405 | trace_reclaim_flags(page, sync_writeback)); | 440 | trace_reclaim_flags(page, sc->lumpy_reclaim_mode)); |
406 | inc_zone_page_state(page, NR_VMSCAN_WRITE); | 441 | inc_zone_page_state(page, NR_VMSCAN_WRITE); |
407 | return PAGE_SUCCESS; | 442 | return PAGE_SUCCESS; |
408 | } | 443 | } |
@@ -580,7 +615,7 @@ static enum page_references page_check_references(struct page *page, | |||
580 | referenced_page = TestClearPageReferenced(page); | 615 | referenced_page = TestClearPageReferenced(page); |
581 | 616 | ||
582 | /* Lumpy reclaim - ignore references */ | 617 | /* Lumpy reclaim - ignore references */ |
583 | if (sc->lumpy_reclaim_mode) | 618 | if (sc->lumpy_reclaim_mode != LUMPY_MODE_NONE) |
584 | return PAGEREF_RECLAIM; | 619 | return PAGEREF_RECLAIM; |
585 | 620 | ||
586 | /* | 621 | /* |
@@ -616,7 +651,7 @@ static enum page_references page_check_references(struct page *page, | |||
616 | } | 651 | } |
617 | 652 | ||
618 | /* Reclaim if clean, defer dirty pages to writeback */ | 653 | /* Reclaim if clean, defer dirty pages to writeback */ |
619 | if (referenced_page) | 654 | if (referenced_page && !PageSwapBacked(page)) |
620 | return PAGEREF_RECLAIM_CLEAN; | 655 | return PAGEREF_RECLAIM_CLEAN; |
621 | 656 | ||
622 | return PAGEREF_RECLAIM; | 657 | return PAGEREF_RECLAIM; |
@@ -644,12 +679,14 @@ static noinline_for_stack void free_page_list(struct list_head *free_pages) | |||
644 | * shrink_page_list() returns the number of reclaimed pages | 679 | * shrink_page_list() returns the number of reclaimed pages |
645 | */ | 680 | */ |
646 | static unsigned long shrink_page_list(struct list_head *page_list, | 681 | static unsigned long shrink_page_list(struct list_head *page_list, |
647 | struct scan_control *sc, | 682 | struct zone *zone, |
648 | enum pageout_io sync_writeback) | 683 | struct scan_control *sc) |
649 | { | 684 | { |
650 | LIST_HEAD(ret_pages); | 685 | LIST_HEAD(ret_pages); |
651 | LIST_HEAD(free_pages); | 686 | LIST_HEAD(free_pages); |
652 | int pgactivate = 0; | 687 | int pgactivate = 0; |
688 | unsigned long nr_dirty = 0; | ||
689 | unsigned long nr_congested = 0; | ||
653 | unsigned long nr_reclaimed = 0; | 690 | unsigned long nr_reclaimed = 0; |
654 | 691 | ||
655 | cond_resched(); | 692 | cond_resched(); |
@@ -669,6 +706,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
669 | goto keep; | 706 | goto keep; |
670 | 707 | ||
671 | VM_BUG_ON(PageActive(page)); | 708 | VM_BUG_ON(PageActive(page)); |
709 | VM_BUG_ON(page_zone(page) != zone); | ||
672 | 710 | ||
673 | sc->nr_scanned++; | 711 | sc->nr_scanned++; |
674 | 712 | ||
@@ -694,10 +732,13 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
694 | * for any page for which writeback has already | 732 | * for any page for which writeback has already |
695 | * started. | 733 | * started. |
696 | */ | 734 | */ |
697 | if (sync_writeback == PAGEOUT_IO_SYNC && may_enter_fs) | 735 | if (sc->lumpy_reclaim_mode == LUMPY_MODE_SYNC && |
736 | may_enter_fs) | ||
698 | wait_on_page_writeback(page); | 737 | wait_on_page_writeback(page); |
699 | else | 738 | else { |
700 | goto keep_locked; | 739 | unlock_page(page); |
740 | goto keep_lumpy; | ||
741 | } | ||
701 | } | 742 | } |
702 | 743 | ||
703 | references = page_check_references(page, sc); | 744 | references = page_check_references(page, sc); |
@@ -743,6 +784,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
743 | } | 784 | } |
744 | 785 | ||
745 | if (PageDirty(page)) { | 786 | if (PageDirty(page)) { |
787 | nr_dirty++; | ||
788 | |||
746 | if (references == PAGEREF_RECLAIM_CLEAN) | 789 | if (references == PAGEREF_RECLAIM_CLEAN) |
747 | goto keep_locked; | 790 | goto keep_locked; |
748 | if (!may_enter_fs) | 791 | if (!may_enter_fs) |
@@ -751,14 +794,18 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
751 | goto keep_locked; | 794 | goto keep_locked; |
752 | 795 | ||
753 | /* Page is dirty, try to write it out here */ | 796 | /* Page is dirty, try to write it out here */ |
754 | switch (pageout(page, mapping, sync_writeback)) { | 797 | switch (pageout(page, mapping, sc)) { |
755 | case PAGE_KEEP: | 798 | case PAGE_KEEP: |
799 | nr_congested++; | ||
756 | goto keep_locked; | 800 | goto keep_locked; |
757 | case PAGE_ACTIVATE: | 801 | case PAGE_ACTIVATE: |
758 | goto activate_locked; | 802 | goto activate_locked; |
759 | case PAGE_SUCCESS: | 803 | case PAGE_SUCCESS: |
760 | if (PageWriteback(page) || PageDirty(page)) | 804 | if (PageWriteback(page)) |
805 | goto keep_lumpy; | ||
806 | if (PageDirty(page)) | ||
761 | goto keep; | 807 | goto keep; |
808 | |||
762 | /* | 809 | /* |
763 | * A synchronous write - probably a ramdisk. Go | 810 | * A synchronous write - probably a ramdisk. Go |
764 | * ahead and try to reclaim the page. | 811 | * ahead and try to reclaim the page. |
@@ -841,6 +888,7 @@ cull_mlocked: | |||
841 | try_to_free_swap(page); | 888 | try_to_free_swap(page); |
842 | unlock_page(page); | 889 | unlock_page(page); |
843 | putback_lru_page(page); | 890 | putback_lru_page(page); |
891 | disable_lumpy_reclaim_mode(sc); | ||
844 | continue; | 892 | continue; |
845 | 893 | ||
846 | activate_locked: | 894 | activate_locked: |
@@ -853,10 +901,21 @@ activate_locked: | |||
853 | keep_locked: | 901 | keep_locked: |
854 | unlock_page(page); | 902 | unlock_page(page); |
855 | keep: | 903 | keep: |
904 | disable_lumpy_reclaim_mode(sc); | ||
905 | keep_lumpy: | ||
856 | list_add(&page->lru, &ret_pages); | 906 | list_add(&page->lru, &ret_pages); |
857 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); | 907 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
858 | } | 908 | } |
859 | 909 | ||
910 | /* | ||
911 | * Tag a zone as congested if all the dirty pages encountered were | ||
912 | * backed by a congested BDI. In this case, reclaimers should just | ||
913 | * back off and wait for congestion to clear because further reclaim | ||
914 | * will encounter the same problem | ||
915 | */ | ||
916 | if (nr_dirty == nr_congested) | ||
917 | zone_set_flag(zone, ZONE_CONGESTED); | ||
918 | |||
860 | free_page_list(&free_pages); | 919 | free_page_list(&free_pages); |
861 | 920 | ||
862 | list_splice(&ret_pages, page_list); | 921 | list_splice(&ret_pages, page_list); |
@@ -1006,7 +1065,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1006 | 1065 | ||
1007 | /* Check that we have not crossed a zone boundary. */ | 1066 | /* Check that we have not crossed a zone boundary. */ |
1008 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | 1067 | if (unlikely(page_zone_id(cursor_page) != zone_id)) |
1009 | continue; | 1068 | break; |
1010 | 1069 | ||
1011 | /* | 1070 | /* |
1012 | * If we don't have enough swap space, reclaiming of | 1071 | * If we don't have enough swap space, reclaiming of |
@@ -1014,8 +1073,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1014 | * pointless. | 1073 | * pointless. |
1015 | */ | 1074 | */ |
1016 | if (nr_swap_pages <= 0 && PageAnon(cursor_page) && | 1075 | if (nr_swap_pages <= 0 && PageAnon(cursor_page) && |
1017 | !PageSwapCache(cursor_page)) | 1076 | !PageSwapCache(cursor_page)) |
1018 | continue; | 1077 | break; |
1019 | 1078 | ||
1020 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { | 1079 | if (__isolate_lru_page(cursor_page, mode, file) == 0) { |
1021 | list_move(&cursor_page->lru, dst); | 1080 | list_move(&cursor_page->lru, dst); |
@@ -1026,11 +1085,16 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1026 | nr_lumpy_dirty++; | 1085 | nr_lumpy_dirty++; |
1027 | scan++; | 1086 | scan++; |
1028 | } else { | 1087 | } else { |
1029 | if (mode == ISOLATE_BOTH && | 1088 | /* the page is freed already. */ |
1030 | page_count(cursor_page)) | 1089 | if (!page_count(cursor_page)) |
1031 | nr_lumpy_failed++; | 1090 | continue; |
1091 | break; | ||
1032 | } | 1092 | } |
1033 | } | 1093 | } |
1094 | |||
1095 | /* If we break out of the loop above, lumpy reclaim failed */ | ||
1096 | if (pfn < end_pfn) | ||
1097 | nr_lumpy_failed++; | ||
1034 | } | 1098 | } |
1035 | 1099 | ||
1036 | *scanned = scan; | 1100 | *scanned = scan; |
@@ -1253,7 +1317,7 @@ static inline bool should_reclaim_stall(unsigned long nr_taken, | |||
1253 | return false; | 1317 | return false; |
1254 | 1318 | ||
1255 | /* Only stall on lumpy reclaim */ | 1319 | /* Only stall on lumpy reclaim */ |
1256 | if (!sc->lumpy_reclaim_mode) | 1320 | if (sc->lumpy_reclaim_mode == LUMPY_MODE_NONE) |
1257 | return false; | 1321 | return false; |
1258 | 1322 | ||
1259 | /* If we have relaimed everything on the isolated list, no stall */ | 1323 | /* If we have relaimed everything on the isolated list, no stall */ |
@@ -1286,7 +1350,6 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1286 | unsigned long nr_scanned; | 1350 | unsigned long nr_scanned; |
1287 | unsigned long nr_reclaimed = 0; | 1351 | unsigned long nr_reclaimed = 0; |
1288 | unsigned long nr_taken; | 1352 | unsigned long nr_taken; |
1289 | unsigned long nr_active; | ||
1290 | unsigned long nr_anon; | 1353 | unsigned long nr_anon; |
1291 | unsigned long nr_file; | 1354 | unsigned long nr_file; |
1292 | 1355 | ||
@@ -1298,15 +1361,15 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1298 | return SWAP_CLUSTER_MAX; | 1361 | return SWAP_CLUSTER_MAX; |
1299 | } | 1362 | } |
1300 | 1363 | ||
1301 | 1364 | set_lumpy_reclaim_mode(priority, sc, false); | |
1302 | lru_add_drain(); | 1365 | lru_add_drain(); |
1303 | spin_lock_irq(&zone->lru_lock); | 1366 | spin_lock_irq(&zone->lru_lock); |
1304 | 1367 | ||
1305 | if (scanning_global_lru(sc)) { | 1368 | if (scanning_global_lru(sc)) { |
1306 | nr_taken = isolate_pages_global(nr_to_scan, | 1369 | nr_taken = isolate_pages_global(nr_to_scan, |
1307 | &page_list, &nr_scanned, sc->order, | 1370 | &page_list, &nr_scanned, sc->order, |
1308 | sc->lumpy_reclaim_mode ? | 1371 | sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ? |
1309 | ISOLATE_BOTH : ISOLATE_INACTIVE, | 1372 | ISOLATE_INACTIVE : ISOLATE_BOTH, |
1310 | zone, 0, file); | 1373 | zone, 0, file); |
1311 | zone->pages_scanned += nr_scanned; | 1374 | zone->pages_scanned += nr_scanned; |
1312 | if (current_is_kswapd()) | 1375 | if (current_is_kswapd()) |
@@ -1318,8 +1381,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1318 | } else { | 1381 | } else { |
1319 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, | 1382 | nr_taken = mem_cgroup_isolate_pages(nr_to_scan, |
1320 | &page_list, &nr_scanned, sc->order, | 1383 | &page_list, &nr_scanned, sc->order, |
1321 | sc->lumpy_reclaim_mode ? | 1384 | sc->lumpy_reclaim_mode == LUMPY_MODE_NONE ? |
1322 | ISOLATE_BOTH : ISOLATE_INACTIVE, | 1385 | ISOLATE_INACTIVE : ISOLATE_BOTH, |
1323 | zone, sc->mem_cgroup, | 1386 | zone, sc->mem_cgroup, |
1324 | 0, file); | 1387 | 0, file); |
1325 | /* | 1388 | /* |
@@ -1337,20 +1400,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1337 | 1400 | ||
1338 | spin_unlock_irq(&zone->lru_lock); | 1401 | spin_unlock_irq(&zone->lru_lock); |
1339 | 1402 | ||
1340 | nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC); | 1403 | nr_reclaimed = shrink_page_list(&page_list, zone, sc); |
1341 | 1404 | ||
1342 | /* Check if we should syncronously wait for writeback */ | 1405 | /* Check if we should syncronously wait for writeback */ |
1343 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { | 1406 | if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) { |
1344 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1407 | set_lumpy_reclaim_mode(priority, sc, true); |
1345 | 1408 | nr_reclaimed += shrink_page_list(&page_list, zone, sc); | |
1346 | /* | ||
1347 | * The attempt at page out may have made some | ||
1348 | * of the pages active, mark them inactive again. | ||
1349 | */ | ||
1350 | nr_active = clear_active_flags(&page_list, NULL); | ||
1351 | count_vm_events(PGDEACTIVATE, nr_active); | ||
1352 | |||
1353 | nr_reclaimed += shrink_page_list(&page_list, sc, PAGEOUT_IO_SYNC); | ||
1354 | } | 1409 | } |
1355 | 1410 | ||
1356 | local_irq_disable(); | 1411 | local_irq_disable(); |
@@ -1359,6 +1414,12 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone, | |||
1359 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); | 1414 | __count_zone_vm_events(PGSTEAL, zone, nr_reclaimed); |
1360 | 1415 | ||
1361 | putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); | 1416 | putback_lru_pages(zone, sc, nr_anon, nr_file, &page_list); |
1417 | |||
1418 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, | ||
1419 | zone_idx(zone), | ||
1420 | nr_scanned, nr_reclaimed, | ||
1421 | priority, | ||
1422 | trace_shrink_flags(file, sc->lumpy_reclaim_mode)); | ||
1362 | return nr_reclaimed; | 1423 | return nr_reclaimed; |
1363 | } | 1424 | } |
1364 | 1425 | ||
@@ -1506,6 +1567,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1506 | spin_unlock_irq(&zone->lru_lock); | 1567 | spin_unlock_irq(&zone->lru_lock); |
1507 | } | 1568 | } |
1508 | 1569 | ||
1570 | #ifdef CONFIG_SWAP | ||
1509 | static int inactive_anon_is_low_global(struct zone *zone) | 1571 | static int inactive_anon_is_low_global(struct zone *zone) |
1510 | { | 1572 | { |
1511 | unsigned long active, inactive; | 1573 | unsigned long active, inactive; |
@@ -1531,12 +1593,26 @@ static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) | |||
1531 | { | 1593 | { |
1532 | int low; | 1594 | int low; |
1533 | 1595 | ||
1596 | /* | ||
1597 | * If we don't have swap space, anonymous page deactivation | ||
1598 | * is pointless. | ||
1599 | */ | ||
1600 | if (!total_swap_pages) | ||
1601 | return 0; | ||
1602 | |||
1534 | if (scanning_global_lru(sc)) | 1603 | if (scanning_global_lru(sc)) |
1535 | low = inactive_anon_is_low_global(zone); | 1604 | low = inactive_anon_is_low_global(zone); |
1536 | else | 1605 | else |
1537 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); | 1606 | low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup); |
1538 | return low; | 1607 | return low; |
1539 | } | 1608 | } |
1609 | #else | ||
1610 | static inline int inactive_anon_is_low(struct zone *zone, | ||
1611 | struct scan_control *sc) | ||
1612 | { | ||
1613 | return 0; | ||
1614 | } | ||
1615 | #endif | ||
1540 | 1616 | ||
1541 | static int inactive_file_is_low_global(struct zone *zone) | 1617 | static int inactive_file_is_low_global(struct zone *zone) |
1542 | { | 1618 | { |
@@ -1721,21 +1797,6 @@ out: | |||
1721 | } | 1797 | } |
1722 | } | 1798 | } |
1723 | 1799 | ||
1724 | static void set_lumpy_reclaim_mode(int priority, struct scan_control *sc) | ||
1725 | { | ||
1726 | /* | ||
1727 | * If we need a large contiguous chunk of memory, or have | ||
1728 | * trouble getting a small set of contiguous pages, we | ||
1729 | * will reclaim both active and inactive pages. | ||
1730 | */ | ||
1731 | if (sc->order > PAGE_ALLOC_COSTLY_ORDER) | ||
1732 | sc->lumpy_reclaim_mode = 1; | ||
1733 | else if (sc->order && priority < DEF_PRIORITY - 2) | ||
1734 | sc->lumpy_reclaim_mode = 1; | ||
1735 | else | ||
1736 | sc->lumpy_reclaim_mode = 0; | ||
1737 | } | ||
1738 | |||
1739 | /* | 1800 | /* |
1740 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1801 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
1741 | */ | 1802 | */ |
@@ -1750,8 +1811,6 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1750 | 1811 | ||
1751 | get_scan_count(zone, sc, nr, priority); | 1812 | get_scan_count(zone, sc, nr, priority); |
1752 | 1813 | ||
1753 | set_lumpy_reclaim_mode(priority, sc); | ||
1754 | |||
1755 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1814 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
1756 | nr[LRU_INACTIVE_FILE]) { | 1815 | nr[LRU_INACTIVE_FILE]) { |
1757 | for_each_evictable_lru(l) { | 1816 | for_each_evictable_lru(l) { |
@@ -1782,7 +1841,7 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1782 | * Even if we did not try to evict anon pages at all, we want to | 1841 | * Even if we did not try to evict anon pages at all, we want to |
1783 | * rebalance the anon lru active/inactive ratio. | 1842 | * rebalance the anon lru active/inactive ratio. |
1784 | */ | 1843 | */ |
1785 | if (inactive_anon_is_low(zone, sc) && nr_swap_pages > 0) | 1844 | if (inactive_anon_is_low(zone, sc)) |
1786 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); | 1845 | shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); |
1787 | 1846 | ||
1788 | throttle_vm_writeout(sc->gfp_mask); | 1847 | throttle_vm_writeout(sc->gfp_mask); |
@@ -1937,21 +1996,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1937 | 1996 | ||
1938 | /* Take a nap, wait for some writeback to complete */ | 1997 | /* Take a nap, wait for some writeback to complete */ |
1939 | if (!sc->hibernation_mode && sc->nr_scanned && | 1998 | if (!sc->hibernation_mode && sc->nr_scanned && |
1940 | priority < DEF_PRIORITY - 2) | 1999 | priority < DEF_PRIORITY - 2) { |
1941 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 2000 | struct zone *preferred_zone; |
2001 | |||
2002 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | ||
2003 | NULL, &preferred_zone); | ||
2004 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | ||
2005 | } | ||
1942 | } | 2006 | } |
1943 | 2007 | ||
1944 | out: | 2008 | out: |
1945 | /* | ||
1946 | * Now that we've scanned all the zones at this priority level, note | ||
1947 | * that level within the zone so that the next thread which performs | ||
1948 | * scanning of this zone will immediately start out at this priority | ||
1949 | * level. This affects only the decision whether or not to bring | ||
1950 | * mapped pages onto the inactive list. | ||
1951 | */ | ||
1952 | if (priority < 0) | ||
1953 | priority = 0; | ||
1954 | |||
1955 | delayacct_freepages_end(); | 2009 | delayacct_freepages_end(); |
1956 | put_mems_allowed(); | 2010 | put_mems_allowed(); |
1957 | 2011 | ||
@@ -2247,6 +2301,15 @@ loop_again: | |||
2247 | if (!zone_watermark_ok(zone, order, | 2301 | if (!zone_watermark_ok(zone, order, |
2248 | min_wmark_pages(zone), end_zone, 0)) | 2302 | min_wmark_pages(zone), end_zone, 0)) |
2249 | has_under_min_watermark_zone = 1; | 2303 | has_under_min_watermark_zone = 1; |
2304 | } else { | ||
2305 | /* | ||
2306 | * If a zone reaches its high watermark, | ||
2307 | * consider it to be no longer congested. It's | ||
2308 | * possible there are dirty pages backed by | ||
2309 | * congested BDIs but as pressure is relieved, | ||
2310 | * spectulatively avoid congestion waits | ||
2311 | */ | ||
2312 | zone_clear_flag(zone, ZONE_CONGESTED); | ||
2250 | } | 2313 | } |
2251 | 2314 | ||
2252 | } | 2315 | } |
@@ -2987,6 +3050,7 @@ int scan_unevictable_handler(struct ctl_table *table, int write, | |||
2987 | return 0; | 3050 | return 0; |
2988 | } | 3051 | } |
2989 | 3052 | ||
3053 | #ifdef CONFIG_NUMA | ||
2990 | /* | 3054 | /* |
2991 | * per node 'scan_unevictable_pages' attribute. On demand re-scan of | 3055 | * per node 'scan_unevictable_pages' attribute. On demand re-scan of |
2992 | * a specified node's per zone unevictable lists for evictable pages. | 3056 | * a specified node's per zone unevictable lists for evictable pages. |
@@ -3033,4 +3097,4 @@ void scan_unevictable_unregister_node(struct node *node) | |||
3033 | { | 3097 | { |
3034 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); | 3098 | sysdev_remove_file(&node->sysdev, &attr_scan_unevictable_pages); |
3035 | } | 3099 | } |
3036 | 3100 | #endif | |