diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 585 |
1 files changed, 384 insertions, 201 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index fa6a85378ee4..99b3ac7771ad 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -546,7 +546,6 @@ int remove_mapping(struct address_space *mapping, struct page *page) | |||
546 | void putback_lru_page(struct page *page) | 546 | void putback_lru_page(struct page *page) |
547 | { | 547 | { |
548 | int lru; | 548 | int lru; |
549 | int active = !!TestClearPageActive(page); | ||
550 | int was_unevictable = PageUnevictable(page); | 549 | int was_unevictable = PageUnevictable(page); |
551 | 550 | ||
552 | VM_BUG_ON(PageLRU(page)); | 551 | VM_BUG_ON(PageLRU(page)); |
@@ -561,8 +560,8 @@ redo: | |||
561 | * unevictable page on [in]active list. | 560 | * unevictable page on [in]active list. |
562 | * We know how to handle that. | 561 | * We know how to handle that. |
563 | */ | 562 | */ |
564 | lru = active + page_lru_base_type(page); | 563 | lru = page_lru_base_type(page); |
565 | lru_cache_add_lru(page, lru); | 564 | lru_cache_add(page); |
566 | } else { | 565 | } else { |
567 | /* | 566 | /* |
568 | * Put unevictable pages directly on zone's unevictable | 567 | * Put unevictable pages directly on zone's unevictable |
@@ -669,6 +668,35 @@ static enum page_references page_check_references(struct page *page, | |||
669 | return PAGEREF_RECLAIM; | 668 | return PAGEREF_RECLAIM; |
670 | } | 669 | } |
671 | 670 | ||
671 | /* Check if a page is dirty or under writeback */ | ||
672 | static void page_check_dirty_writeback(struct page *page, | ||
673 | bool *dirty, bool *writeback) | ||
674 | { | ||
675 | struct address_space *mapping; | ||
676 | |||
677 | /* | ||
678 | * Anonymous pages are not handled by flushers and must be written | ||
679 | * from reclaim context. Do not stall reclaim based on them | ||
680 | */ | ||
681 | if (!page_is_file_cache(page)) { | ||
682 | *dirty = false; | ||
683 | *writeback = false; | ||
684 | return; | ||
685 | } | ||
686 | |||
687 | /* By default assume that the page flags are accurate */ | ||
688 | *dirty = PageDirty(page); | ||
689 | *writeback = PageWriteback(page); | ||
690 | |||
691 | /* Verify dirty/writeback state if the filesystem supports it */ | ||
692 | if (!page_has_private(page)) | ||
693 | return; | ||
694 | |||
695 | mapping = page_mapping(page); | ||
696 | if (mapping && mapping->a_ops->is_dirty_writeback) | ||
697 | mapping->a_ops->is_dirty_writeback(page, dirty, writeback); | ||
698 | } | ||
699 | |||
672 | /* | 700 | /* |
673 | * shrink_page_list() returns the number of reclaimed pages | 701 | * shrink_page_list() returns the number of reclaimed pages |
674 | */ | 702 | */ |
@@ -677,16 +705,21 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
677 | struct scan_control *sc, | 705 | struct scan_control *sc, |
678 | enum ttu_flags ttu_flags, | 706 | enum ttu_flags ttu_flags, |
679 | unsigned long *ret_nr_dirty, | 707 | unsigned long *ret_nr_dirty, |
708 | unsigned long *ret_nr_unqueued_dirty, | ||
709 | unsigned long *ret_nr_congested, | ||
680 | unsigned long *ret_nr_writeback, | 710 | unsigned long *ret_nr_writeback, |
711 | unsigned long *ret_nr_immediate, | ||
681 | bool force_reclaim) | 712 | bool force_reclaim) |
682 | { | 713 | { |
683 | LIST_HEAD(ret_pages); | 714 | LIST_HEAD(ret_pages); |
684 | LIST_HEAD(free_pages); | 715 | LIST_HEAD(free_pages); |
685 | int pgactivate = 0; | 716 | int pgactivate = 0; |
717 | unsigned long nr_unqueued_dirty = 0; | ||
686 | unsigned long nr_dirty = 0; | 718 | unsigned long nr_dirty = 0; |
687 | unsigned long nr_congested = 0; | 719 | unsigned long nr_congested = 0; |
688 | unsigned long nr_reclaimed = 0; | 720 | unsigned long nr_reclaimed = 0; |
689 | unsigned long nr_writeback = 0; | 721 | unsigned long nr_writeback = 0; |
722 | unsigned long nr_immediate = 0; | ||
690 | 723 | ||
691 | cond_resched(); | 724 | cond_resched(); |
692 | 725 | ||
@@ -696,6 +729,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
696 | struct page *page; | 729 | struct page *page; |
697 | int may_enter_fs; | 730 | int may_enter_fs; |
698 | enum page_references references = PAGEREF_RECLAIM_CLEAN; | 731 | enum page_references references = PAGEREF_RECLAIM_CLEAN; |
732 | bool dirty, writeback; | ||
699 | 733 | ||
700 | cond_resched(); | 734 | cond_resched(); |
701 | 735 | ||
@@ -723,25 +757,77 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
723 | may_enter_fs = (sc->gfp_mask & __GFP_FS) || | 757 | may_enter_fs = (sc->gfp_mask & __GFP_FS) || |
724 | (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); | 758 | (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); |
725 | 759 | ||
760 | /* | ||
761 | * The number of dirty pages determines if a zone is marked | ||
762 | * reclaim_congested which affects wait_iff_congested. kswapd | ||
763 | * will stall and start writing pages if the tail of the LRU | ||
764 | * is all dirty unqueued pages. | ||
765 | */ | ||
766 | page_check_dirty_writeback(page, &dirty, &writeback); | ||
767 | if (dirty || writeback) | ||
768 | nr_dirty++; | ||
769 | |||
770 | if (dirty && !writeback) | ||
771 | nr_unqueued_dirty++; | ||
772 | |||
773 | /* | ||
774 | * Treat this page as congested if the underlying BDI is or if | ||
775 | * pages are cycling through the LRU so quickly that the | ||
776 | * pages marked for immediate reclaim are making it to the | ||
777 | * end of the LRU a second time. | ||
778 | */ | ||
779 | mapping = page_mapping(page); | ||
780 | if ((mapping && bdi_write_congested(mapping->backing_dev_info)) || | ||
781 | (writeback && PageReclaim(page))) | ||
782 | nr_congested++; | ||
783 | |||
784 | /* | ||
785 | * If a page at the tail of the LRU is under writeback, there | ||
786 | * are three cases to consider. | ||
787 | * | ||
788 | * 1) If reclaim is encountering an excessive number of pages | ||
789 | * under writeback and this page is both under writeback and | ||
790 | * PageReclaim then it indicates that pages are being queued | ||
791 | * for IO but are being recycled through the LRU before the | ||
792 | * IO can complete. Waiting on the page itself risks an | ||
793 | * indefinite stall if it is impossible to writeback the | ||
794 | * page due to IO error or disconnected storage so instead | ||
795 | * note that the LRU is being scanned too quickly and the | ||
796 | * caller can stall after page list has been processed. | ||
797 | * | ||
798 | * 2) Global reclaim encounters a page, memcg encounters a | ||
799 | * page that is not marked for immediate reclaim or | ||
800 | * the caller does not have __GFP_IO. In this case mark | ||
801 | * the page for immediate reclaim and continue scanning. | ||
802 | * | ||
803 | * __GFP_IO is checked because a loop driver thread might | ||
804 | * enter reclaim, and deadlock if it waits on a page for | ||
805 | * which it is needed to do the write (loop masks off | ||
806 | * __GFP_IO|__GFP_FS for this reason); but more thought | ||
807 | * would probably show more reasons. | ||
808 | * | ||
809 | * Don't require __GFP_FS, since we're not going into the | ||
810 | * FS, just waiting on its writeback completion. Worryingly, | ||
811 | * ext4 gfs2 and xfs allocate pages with | ||
812 | * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing | ||
813 | * may_enter_fs here is liable to OOM on them. | ||
814 | * | ||
815 | * 3) memcg encounters a page that is not already marked | ||
816 | * PageReclaim. memcg does not have any dirty pages | ||
817 | * throttling so we could easily OOM just because too many | ||
818 | * pages are in writeback and there is nothing else to | ||
819 | * reclaim. Wait for the writeback to complete. | ||
820 | */ | ||
726 | if (PageWriteback(page)) { | 821 | if (PageWriteback(page)) { |
727 | /* | 822 | /* Case 1 above */ |
728 | * memcg doesn't have any dirty pages throttling so we | 823 | if (current_is_kswapd() && |
729 | * could easily OOM just because too many pages are in | 824 | PageReclaim(page) && |
730 | * writeback and there is nothing else to reclaim. | 825 | zone_is_reclaim_writeback(zone)) { |
731 | * | 826 | nr_immediate++; |
732 | * Check __GFP_IO, certainly because a loop driver | 827 | goto keep_locked; |
733 | * thread might enter reclaim, and deadlock if it waits | 828 | |
734 | * on a page for which it is needed to do the write | 829 | /* Case 2 above */ |
735 | * (loop masks off __GFP_IO|__GFP_FS for this reason); | 830 | } else if (global_reclaim(sc) || |
736 | * but more thought would probably show more reasons. | ||
737 | * | ||
738 | * Don't require __GFP_FS, since we're not going into | ||
739 | * the FS, just waiting on its writeback completion. | ||
740 | * Worryingly, ext4 gfs2 and xfs allocate pages with | ||
741 | * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so | ||
742 | * testing may_enter_fs here is liable to OOM on them. | ||
743 | */ | ||
744 | if (global_reclaim(sc) || | ||
745 | !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) { | 831 | !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) { |
746 | /* | 832 | /* |
747 | * This is slightly racy - end_page_writeback() | 833 | * This is slightly racy - end_page_writeback() |
@@ -756,9 +842,13 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
756 | */ | 842 | */ |
757 | SetPageReclaim(page); | 843 | SetPageReclaim(page); |
758 | nr_writeback++; | 844 | nr_writeback++; |
845 | |||
759 | goto keep_locked; | 846 | goto keep_locked; |
847 | |||
848 | /* Case 3 above */ | ||
849 | } else { | ||
850 | wait_on_page_writeback(page); | ||
760 | } | 851 | } |
761 | wait_on_page_writeback(page); | ||
762 | } | 852 | } |
763 | 853 | ||
764 | if (!force_reclaim) | 854 | if (!force_reclaim) |
@@ -784,9 +874,10 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
784 | if (!add_to_swap(page, page_list)) | 874 | if (!add_to_swap(page, page_list)) |
785 | goto activate_locked; | 875 | goto activate_locked; |
786 | may_enter_fs = 1; | 876 | may_enter_fs = 1; |
787 | } | ||
788 | 877 | ||
789 | mapping = page_mapping(page); | 878 | /* Adding to swap updated mapping */ |
879 | mapping = page_mapping(page); | ||
880 | } | ||
790 | 881 | ||
791 | /* | 882 | /* |
792 | * The page is mapped into the page tables of one or more | 883 | * The page is mapped into the page tables of one or more |
@@ -806,16 +897,14 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
806 | } | 897 | } |
807 | 898 | ||
808 | if (PageDirty(page)) { | 899 | if (PageDirty(page)) { |
809 | nr_dirty++; | ||
810 | |||
811 | /* | 900 | /* |
812 | * Only kswapd can writeback filesystem pages to | 901 | * Only kswapd can writeback filesystem pages to |
813 | * avoid risk of stack overflow but do not writeback | 902 | * avoid risk of stack overflow but only writeback |
814 | * unless under significant pressure. | 903 | * if many dirty pages have been encountered. |
815 | */ | 904 | */ |
816 | if (page_is_file_cache(page) && | 905 | if (page_is_file_cache(page) && |
817 | (!current_is_kswapd() || | 906 | (!current_is_kswapd() || |
818 | sc->priority >= DEF_PRIORITY - 2)) { | 907 | !zone_is_reclaim_dirty(zone))) { |
819 | /* | 908 | /* |
820 | * Immediately reclaim when written back. | 909 | * Immediately reclaim when written back. |
821 | * Similar in principal to deactivate_page() | 910 | * Similar in principal to deactivate_page() |
@@ -838,7 +927,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
838 | /* Page is dirty, try to write it out here */ | 927 | /* Page is dirty, try to write it out here */ |
839 | switch (pageout(page, mapping, sc)) { | 928 | switch (pageout(page, mapping, sc)) { |
840 | case PAGE_KEEP: | 929 | case PAGE_KEEP: |
841 | nr_congested++; | ||
842 | goto keep_locked; | 930 | goto keep_locked; |
843 | case PAGE_ACTIVATE: | 931 | case PAGE_ACTIVATE: |
844 | goto activate_locked; | 932 | goto activate_locked; |
@@ -946,22 +1034,16 @@ keep: | |||
946 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); | 1034 | VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); |
947 | } | 1035 | } |
948 | 1036 | ||
949 | /* | ||
950 | * Tag a zone as congested if all the dirty pages encountered were | ||
951 | * backed by a congested BDI. In this case, reclaimers should just | ||
952 | * back off and wait for congestion to clear because further reclaim | ||
953 | * will encounter the same problem | ||
954 | */ | ||
955 | if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) | ||
956 | zone_set_flag(zone, ZONE_CONGESTED); | ||
957 | |||
958 | free_hot_cold_page_list(&free_pages, 1); | 1037 | free_hot_cold_page_list(&free_pages, 1); |
959 | 1038 | ||
960 | list_splice(&ret_pages, page_list); | 1039 | list_splice(&ret_pages, page_list); |
961 | count_vm_events(PGACTIVATE, pgactivate); | 1040 | count_vm_events(PGACTIVATE, pgactivate); |
962 | mem_cgroup_uncharge_end(); | 1041 | mem_cgroup_uncharge_end(); |
963 | *ret_nr_dirty += nr_dirty; | 1042 | *ret_nr_dirty += nr_dirty; |
1043 | *ret_nr_congested += nr_congested; | ||
1044 | *ret_nr_unqueued_dirty += nr_unqueued_dirty; | ||
964 | *ret_nr_writeback += nr_writeback; | 1045 | *ret_nr_writeback += nr_writeback; |
1046 | *ret_nr_immediate += nr_immediate; | ||
965 | return nr_reclaimed; | 1047 | return nr_reclaimed; |
966 | } | 1048 | } |
967 | 1049 | ||
@@ -973,7 +1055,7 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, | |||
973 | .priority = DEF_PRIORITY, | 1055 | .priority = DEF_PRIORITY, |
974 | .may_unmap = 1, | 1056 | .may_unmap = 1, |
975 | }; | 1057 | }; |
976 | unsigned long ret, dummy1, dummy2; | 1058 | unsigned long ret, dummy1, dummy2, dummy3, dummy4, dummy5; |
977 | struct page *page, *next; | 1059 | struct page *page, *next; |
978 | LIST_HEAD(clean_pages); | 1060 | LIST_HEAD(clean_pages); |
979 | 1061 | ||
@@ -985,8 +1067,8 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone, | |||
985 | } | 1067 | } |
986 | 1068 | ||
987 | ret = shrink_page_list(&clean_pages, zone, &sc, | 1069 | ret = shrink_page_list(&clean_pages, zone, &sc, |
988 | TTU_UNMAP|TTU_IGNORE_ACCESS, | 1070 | TTU_UNMAP|TTU_IGNORE_ACCESS, |
989 | &dummy1, &dummy2, true); | 1071 | &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true); |
990 | list_splice(&clean_pages, page_list); | 1072 | list_splice(&clean_pages, page_list); |
991 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret); | 1073 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret); |
992 | return ret; | 1074 | return ret; |
@@ -1281,7 +1363,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, | |||
1281 | unsigned long nr_reclaimed = 0; | 1363 | unsigned long nr_reclaimed = 0; |
1282 | unsigned long nr_taken; | 1364 | unsigned long nr_taken; |
1283 | unsigned long nr_dirty = 0; | 1365 | unsigned long nr_dirty = 0; |
1366 | unsigned long nr_congested = 0; | ||
1367 | unsigned long nr_unqueued_dirty = 0; | ||
1284 | unsigned long nr_writeback = 0; | 1368 | unsigned long nr_writeback = 0; |
1369 | unsigned long nr_immediate = 0; | ||
1285 | isolate_mode_t isolate_mode = 0; | 1370 | isolate_mode_t isolate_mode = 0; |
1286 | int file = is_file_lru(lru); | 1371 | int file = is_file_lru(lru); |
1287 | struct zone *zone = lruvec_zone(lruvec); | 1372 | struct zone *zone = lruvec_zone(lruvec); |
@@ -1323,7 +1408,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, | |||
1323 | return 0; | 1408 | return 0; |
1324 | 1409 | ||
1325 | nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP, | 1410 | nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP, |
1326 | &nr_dirty, &nr_writeback, false); | 1411 | &nr_dirty, &nr_unqueued_dirty, &nr_congested, |
1412 | &nr_writeback, &nr_immediate, | ||
1413 | false); | ||
1327 | 1414 | ||
1328 | spin_lock_irq(&zone->lru_lock); | 1415 | spin_lock_irq(&zone->lru_lock); |
1329 | 1416 | ||
@@ -1357,7 +1444,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, | |||
1357 | * same way balance_dirty_pages() manages. | 1444 | * same way balance_dirty_pages() manages. |
1358 | * | 1445 | * |
1359 | * This scales the number of dirty pages that must be under writeback | 1446 | * This scales the number of dirty pages that must be under writeback |
1360 | * before throttling depending on priority. It is a simple backoff | 1447 | * before a zone gets flagged ZONE_WRITEBACK. It is a simple backoff |
1361 | * function that has the most effect in the range DEF_PRIORITY to | 1448 | * function that has the most effect in the range DEF_PRIORITY to |
1362 | * DEF_PRIORITY-2 which is the priority reclaim is considered to be | 1449 | * DEF_PRIORITY-2 which is the priority reclaim is considered to be |
1363 | * in trouble and reclaim is considered to be in trouble. | 1450 | * in trouble and reclaim is considered to be in trouble. |
@@ -1368,9 +1455,53 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, | |||
1368 | * ... | 1455 | * ... |
1369 | * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any | 1456 | * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any |
1370 | * isolated page is PageWriteback | 1457 | * isolated page is PageWriteback |
1458 | * | ||
1459 | * Once a zone is flagged ZONE_WRITEBACK, kswapd will count the number | ||
1460 | * of pages under pages flagged for immediate reclaim and stall if any | ||
1461 | * are encountered in the nr_immediate check below. | ||
1371 | */ | 1462 | */ |
1372 | if (nr_writeback && nr_writeback >= | 1463 | if (nr_writeback && nr_writeback >= |
1373 | (nr_taken >> (DEF_PRIORITY - sc->priority))) | 1464 | (nr_taken >> (DEF_PRIORITY - sc->priority))) |
1465 | zone_set_flag(zone, ZONE_WRITEBACK); | ||
1466 | |||
1467 | /* | ||
1468 | * memcg will stall in page writeback so only consider forcibly | ||
1469 | * stalling for global reclaim | ||
1470 | */ | ||
1471 | if (global_reclaim(sc)) { | ||
1472 | /* | ||
1473 | * Tag a zone as congested if all the dirty pages scanned were | ||
1474 | * backed by a congested BDI and wait_iff_congested will stall. | ||
1475 | */ | ||
1476 | if (nr_dirty && nr_dirty == nr_congested) | ||
1477 | zone_set_flag(zone, ZONE_CONGESTED); | ||
1478 | |||
1479 | /* | ||
1480 | * If dirty pages are scanned that are not queued for IO, it | ||
1481 | * implies that flushers are not keeping up. In this case, flag | ||
1482 | * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing | ||
1483 | * pages from reclaim context. It will forcibly stall in the | ||
1484 | * next check. | ||
1485 | */ | ||
1486 | if (nr_unqueued_dirty == nr_taken) | ||
1487 | zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY); | ||
1488 | |||
1489 | /* | ||
1490 | * In addition, if kswapd scans pages marked marked for | ||
1491 | * immediate reclaim and under writeback (nr_immediate), it | ||
1492 | * implies that pages are cycling through the LRU faster than | ||
1493 | * they are written so also forcibly stall. | ||
1494 | */ | ||
1495 | if (nr_unqueued_dirty == nr_taken || nr_immediate) | ||
1496 | congestion_wait(BLK_RW_ASYNC, HZ/10); | ||
1497 | } | ||
1498 | |||
1499 | /* | ||
1500 | * Stall direct reclaim for IO completions if underlying BDIs or zone | ||
1501 | * is congested. Allow kswapd to continue until it starts encountering | ||
1502 | * unqueued dirty pages or cycling through the LRU too quickly. | ||
1503 | */ | ||
1504 | if (!sc->hibernation_mode && !current_is_kswapd()) | ||
1374 | wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); | 1505 | wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); |
1375 | 1506 | ||
1376 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, | 1507 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, |
@@ -1822,17 +1953,25 @@ out: | |||
1822 | static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | 1953 | static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) |
1823 | { | 1954 | { |
1824 | unsigned long nr[NR_LRU_LISTS]; | 1955 | unsigned long nr[NR_LRU_LISTS]; |
1956 | unsigned long targets[NR_LRU_LISTS]; | ||
1825 | unsigned long nr_to_scan; | 1957 | unsigned long nr_to_scan; |
1826 | enum lru_list lru; | 1958 | enum lru_list lru; |
1827 | unsigned long nr_reclaimed = 0; | 1959 | unsigned long nr_reclaimed = 0; |
1828 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; | 1960 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; |
1829 | struct blk_plug plug; | 1961 | struct blk_plug plug; |
1962 | bool scan_adjusted = false; | ||
1830 | 1963 | ||
1831 | get_scan_count(lruvec, sc, nr); | 1964 | get_scan_count(lruvec, sc, nr); |
1832 | 1965 | ||
1966 | /* Record the original scan target for proportional adjustments later */ | ||
1967 | memcpy(targets, nr, sizeof(nr)); | ||
1968 | |||
1833 | blk_start_plug(&plug); | 1969 | blk_start_plug(&plug); |
1834 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1970 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
1835 | nr[LRU_INACTIVE_FILE]) { | 1971 | nr[LRU_INACTIVE_FILE]) { |
1972 | unsigned long nr_anon, nr_file, percentage; | ||
1973 | unsigned long nr_scanned; | ||
1974 | |||
1836 | for_each_evictable_lru(lru) { | 1975 | for_each_evictable_lru(lru) { |
1837 | if (nr[lru]) { | 1976 | if (nr[lru]) { |
1838 | nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); | 1977 | nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); |
@@ -1842,17 +1981,60 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) | |||
1842 | lruvec, sc); | 1981 | lruvec, sc); |
1843 | } | 1982 | } |
1844 | } | 1983 | } |
1984 | |||
1985 | if (nr_reclaimed < nr_to_reclaim || scan_adjusted) | ||
1986 | continue; | ||
1987 | |||
1845 | /* | 1988 | /* |
1846 | * On large memory systems, scan >> priority can become | 1989 | * For global direct reclaim, reclaim only the number of pages |
1847 | * really large. This is fine for the starting priority; | 1990 | * requested. Less care is taken to scan proportionally as it |
1848 | * we want to put equal scanning pressure on each zone. | 1991 | * is more important to minimise direct reclaim stall latency |
1849 | * However, if the VM has a harder time of freeing pages, | 1992 | * than it is to properly age the LRU lists. |
1850 | * with multiple processes reclaiming pages, the total | ||
1851 | * freeing target can get unreasonably large. | ||
1852 | */ | 1993 | */ |
1853 | if (nr_reclaimed >= nr_to_reclaim && | 1994 | if (global_reclaim(sc) && !current_is_kswapd()) |
1854 | sc->priority < DEF_PRIORITY) | ||
1855 | break; | 1995 | break; |
1996 | |||
1997 | /* | ||
1998 | * For kswapd and memcg, reclaim at least the number of pages | ||
1999 | * requested. Ensure that the anon and file LRUs shrink | ||
2000 | * proportionally what was requested by get_scan_count(). We | ||
2001 | * stop reclaiming one LRU and reduce the amount scanning | ||
2002 | * proportional to the original scan target. | ||
2003 | */ | ||
2004 | nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; | ||
2005 | nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; | ||
2006 | |||
2007 | if (nr_file > nr_anon) { | ||
2008 | unsigned long scan_target = targets[LRU_INACTIVE_ANON] + | ||
2009 | targets[LRU_ACTIVE_ANON] + 1; | ||
2010 | lru = LRU_BASE; | ||
2011 | percentage = nr_anon * 100 / scan_target; | ||
2012 | } else { | ||
2013 | unsigned long scan_target = targets[LRU_INACTIVE_FILE] + | ||
2014 | targets[LRU_ACTIVE_FILE] + 1; | ||
2015 | lru = LRU_FILE; | ||
2016 | percentage = nr_file * 100 / scan_target; | ||
2017 | } | ||
2018 | |||
2019 | /* Stop scanning the smaller of the LRU */ | ||
2020 | nr[lru] = 0; | ||
2021 | nr[lru + LRU_ACTIVE] = 0; | ||
2022 | |||
2023 | /* | ||
2024 | * Recalculate the other LRU scan count based on its original | ||
2025 | * scan target and the percentage scanning already complete | ||
2026 | */ | ||
2027 | lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; | ||
2028 | nr_scanned = targets[lru] - nr[lru]; | ||
2029 | nr[lru] = targets[lru] * (100 - percentage) / 100; | ||
2030 | nr[lru] -= min(nr[lru], nr_scanned); | ||
2031 | |||
2032 | lru += LRU_ACTIVE; | ||
2033 | nr_scanned = targets[lru] - nr[lru]; | ||
2034 | nr[lru] = targets[lru] * (100 - percentage) / 100; | ||
2035 | nr[lru] -= min(nr[lru], nr_scanned); | ||
2036 | |||
2037 | scan_adjusted = true; | ||
1856 | } | 2038 | } |
1857 | blk_finish_plug(&plug); | 2039 | blk_finish_plug(&plug); |
1858 | sc->nr_reclaimed += nr_reclaimed; | 2040 | sc->nr_reclaimed += nr_reclaimed; |
@@ -2222,17 +2404,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2222 | WB_REASON_TRY_TO_FREE_PAGES); | 2404 | WB_REASON_TRY_TO_FREE_PAGES); |
2223 | sc->may_writepage = 1; | 2405 | sc->may_writepage = 1; |
2224 | } | 2406 | } |
2225 | |||
2226 | /* Take a nap, wait for some writeback to complete */ | ||
2227 | if (!sc->hibernation_mode && sc->nr_scanned && | ||
2228 | sc->priority < DEF_PRIORITY - 2) { | ||
2229 | struct zone *preferred_zone; | ||
2230 | |||
2231 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | ||
2232 | &cpuset_current_mems_allowed, | ||
2233 | &preferred_zone); | ||
2234 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | ||
2235 | } | ||
2236 | } while (--sc->priority >= 0); | 2407 | } while (--sc->priority >= 0); |
2237 | 2408 | ||
2238 | out: | 2409 | out: |
@@ -2601,6 +2772,91 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | |||
2601 | } | 2772 | } |
2602 | 2773 | ||
2603 | /* | 2774 | /* |
2775 | * kswapd shrinks the zone by the number of pages required to reach | ||
2776 | * the high watermark. | ||
2777 | * | ||
2778 | * Returns true if kswapd scanned at least the requested number of pages to | ||
2779 | * reclaim or if the lack of progress was due to pages under writeback. | ||
2780 | * This is used to determine if the scanning priority needs to be raised. | ||
2781 | */ | ||
2782 | static bool kswapd_shrink_zone(struct zone *zone, | ||
2783 | int classzone_idx, | ||
2784 | struct scan_control *sc, | ||
2785 | unsigned long lru_pages, | ||
2786 | unsigned long *nr_attempted) | ||
2787 | { | ||
2788 | unsigned long nr_slab; | ||
2789 | int testorder = sc->order; | ||
2790 | unsigned long balance_gap; | ||
2791 | struct reclaim_state *reclaim_state = current->reclaim_state; | ||
2792 | struct shrink_control shrink = { | ||
2793 | .gfp_mask = sc->gfp_mask, | ||
2794 | }; | ||
2795 | bool lowmem_pressure; | ||
2796 | |||
2797 | /* Reclaim above the high watermark. */ | ||
2798 | sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone)); | ||
2799 | |||
2800 | /* | ||
2801 | * Kswapd reclaims only single pages with compaction enabled. Trying | ||
2802 | * too hard to reclaim until contiguous free pages have become | ||
2803 | * available can hurt performance by evicting too much useful data | ||
2804 | * from memory. Do not reclaim more than needed for compaction. | ||
2805 | */ | ||
2806 | if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && | ||
2807 | compaction_suitable(zone, sc->order) != | ||
2808 | COMPACT_SKIPPED) | ||
2809 | testorder = 0; | ||
2810 | |||
2811 | /* | ||
2812 | * We put equal pressure on every zone, unless one zone has way too | ||
2813 | * many pages free already. The "too many pages" is defined as the | ||
2814 | * high wmark plus a "gap" where the gap is either the low | ||
2815 | * watermark or 1% of the zone, whichever is smaller. | ||
2816 | */ | ||
2817 | balance_gap = min(low_wmark_pages(zone), | ||
2818 | (zone->managed_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / | ||
2819 | KSWAPD_ZONE_BALANCE_GAP_RATIO); | ||
2820 | |||
2821 | /* | ||
2822 | * If there is no low memory pressure or the zone is balanced then no | ||
2823 | * reclaim is necessary | ||
2824 | */ | ||
2825 | lowmem_pressure = (buffer_heads_over_limit && is_highmem(zone)); | ||
2826 | if (!lowmem_pressure && zone_balanced(zone, testorder, | ||
2827 | balance_gap, classzone_idx)) | ||
2828 | return true; | ||
2829 | |||
2830 | shrink_zone(zone, sc); | ||
2831 | |||
2832 | reclaim_state->reclaimed_slab = 0; | ||
2833 | nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages); | ||
2834 | sc->nr_reclaimed += reclaim_state->reclaimed_slab; | ||
2835 | |||
2836 | /* Account for the number of pages attempted to reclaim */ | ||
2837 | *nr_attempted += sc->nr_to_reclaim; | ||
2838 | |||
2839 | if (nr_slab == 0 && !zone_reclaimable(zone)) | ||
2840 | zone->all_unreclaimable = 1; | ||
2841 | |||
2842 | zone_clear_flag(zone, ZONE_WRITEBACK); | ||
2843 | |||
2844 | /* | ||
2845 | * If a zone reaches its high watermark, consider it to be no longer | ||
2846 | * congested. It's possible there are dirty pages backed by congested | ||
2847 | * BDIs but as pressure is relieved, speculatively avoid congestion | ||
2848 | * waits. | ||
2849 | */ | ||
2850 | if (!zone->all_unreclaimable && | ||
2851 | zone_balanced(zone, testorder, 0, classzone_idx)) { | ||
2852 | zone_clear_flag(zone, ZONE_CONGESTED); | ||
2853 | zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); | ||
2854 | } | ||
2855 | |||
2856 | return sc->nr_scanned >= sc->nr_to_reclaim; | ||
2857 | } | ||
2858 | |||
2859 | /* | ||
2604 | * For kswapd, balance_pgdat() will work across all this node's zones until | 2860 | * For kswapd, balance_pgdat() will work across all this node's zones until |
2605 | * they are all at high_wmark_pages(zone). | 2861 | * they are all at high_wmark_pages(zone). |
2606 | * | 2862 | * |
@@ -2624,35 +2880,28 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining, | |||
2624 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | 2880 | static unsigned long balance_pgdat(pg_data_t *pgdat, int order, |
2625 | int *classzone_idx) | 2881 | int *classzone_idx) |
2626 | { | 2882 | { |
2627 | bool pgdat_is_balanced = false; | ||
2628 | int i; | 2883 | int i; |
2629 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2884 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2630 | struct reclaim_state *reclaim_state = current->reclaim_state; | ||
2631 | unsigned long nr_soft_reclaimed; | 2885 | unsigned long nr_soft_reclaimed; |
2632 | unsigned long nr_soft_scanned; | 2886 | unsigned long nr_soft_scanned; |
2633 | struct scan_control sc = { | 2887 | struct scan_control sc = { |
2634 | .gfp_mask = GFP_KERNEL, | 2888 | .gfp_mask = GFP_KERNEL, |
2889 | .priority = DEF_PRIORITY, | ||
2635 | .may_unmap = 1, | 2890 | .may_unmap = 1, |
2636 | .may_swap = 1, | 2891 | .may_swap = 1, |
2637 | /* | 2892 | .may_writepage = !laptop_mode, |
2638 | * kswapd doesn't want to be bailed out while reclaim. because | ||
2639 | * we want to put equal scanning pressure on each zone. | ||
2640 | */ | ||
2641 | .nr_to_reclaim = ULONG_MAX, | ||
2642 | .order = order, | 2893 | .order = order, |
2643 | .target_mem_cgroup = NULL, | 2894 | .target_mem_cgroup = NULL, |
2644 | }; | 2895 | }; |
2645 | struct shrink_control shrink = { | ||
2646 | .gfp_mask = sc.gfp_mask, | ||
2647 | }; | ||
2648 | loop_again: | ||
2649 | sc.priority = DEF_PRIORITY; | ||
2650 | sc.nr_reclaimed = 0; | ||
2651 | sc.may_writepage = !laptop_mode; | ||
2652 | count_vm_event(PAGEOUTRUN); | 2896 | count_vm_event(PAGEOUTRUN); |
2653 | 2897 | ||
2654 | do { | 2898 | do { |
2655 | unsigned long lru_pages = 0; | 2899 | unsigned long lru_pages = 0; |
2900 | unsigned long nr_attempted = 0; | ||
2901 | bool raise_priority = true; | ||
2902 | bool pgdat_needs_compaction = (order > 0); | ||
2903 | |||
2904 | sc.nr_reclaimed = 0; | ||
2656 | 2905 | ||
2657 | /* | 2906 | /* |
2658 | * Scan in the highmem->dma direction for the highest | 2907 | * Scan in the highmem->dma direction for the highest |
@@ -2689,23 +2938,46 @@ loop_again: | |||
2689 | end_zone = i; | 2938 | end_zone = i; |
2690 | break; | 2939 | break; |
2691 | } else { | 2940 | } else { |
2692 | /* If balanced, clear the congested flag */ | 2941 | /* |
2942 | * If balanced, clear the dirty and congested | ||
2943 | * flags | ||
2944 | */ | ||
2693 | zone_clear_flag(zone, ZONE_CONGESTED); | 2945 | zone_clear_flag(zone, ZONE_CONGESTED); |
2946 | zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY); | ||
2694 | } | 2947 | } |
2695 | } | 2948 | } |
2696 | 2949 | ||
2697 | if (i < 0) { | 2950 | if (i < 0) |
2698 | pgdat_is_balanced = true; | ||
2699 | goto out; | 2951 | goto out; |
2700 | } | ||
2701 | 2952 | ||
2702 | for (i = 0; i <= end_zone; i++) { | 2953 | for (i = 0; i <= end_zone; i++) { |
2703 | struct zone *zone = pgdat->node_zones + i; | 2954 | struct zone *zone = pgdat->node_zones + i; |
2704 | 2955 | ||
2956 | if (!populated_zone(zone)) | ||
2957 | continue; | ||
2958 | |||
2705 | lru_pages += zone_reclaimable_pages(zone); | 2959 | lru_pages += zone_reclaimable_pages(zone); |
2960 | |||
2961 | /* | ||
2962 | * If any zone is currently balanced then kswapd will | ||
2963 | * not call compaction as it is expected that the | ||
2964 | * necessary pages are already available. | ||
2965 | */ | ||
2966 | if (pgdat_needs_compaction && | ||
2967 | zone_watermark_ok(zone, order, | ||
2968 | low_wmark_pages(zone), | ||
2969 | *classzone_idx, 0)) | ||
2970 | pgdat_needs_compaction = false; | ||
2706 | } | 2971 | } |
2707 | 2972 | ||
2708 | /* | 2973 | /* |
2974 | * If we're getting trouble reclaiming, start doing writepage | ||
2975 | * even in laptop mode. | ||
2976 | */ | ||
2977 | if (sc.priority < DEF_PRIORITY - 2) | ||
2978 | sc.may_writepage = 1; | ||
2979 | |||
2980 | /* | ||
2709 | * Now scan the zone in the dma->highmem direction, stopping | 2981 | * Now scan the zone in the dma->highmem direction, stopping |
2710 | * at the last zone which needs scanning. | 2982 | * at the last zone which needs scanning. |
2711 | * | 2983 | * |
@@ -2716,8 +2988,6 @@ loop_again: | |||
2716 | */ | 2988 | */ |
2717 | for (i = 0; i <= end_zone; i++) { | 2989 | for (i = 0; i <= end_zone; i++) { |
2718 | struct zone *zone = pgdat->node_zones + i; | 2990 | struct zone *zone = pgdat->node_zones + i; |
2719 | int nr_slab, testorder; | ||
2720 | unsigned long balance_gap; | ||
2721 | 2991 | ||
2722 | if (!populated_zone(zone)) | 2992 | if (!populated_zone(zone)) |
2723 | continue; | 2993 | continue; |
@@ -2738,65 +3008,14 @@ loop_again: | |||
2738 | sc.nr_reclaimed += nr_soft_reclaimed; | 3008 | sc.nr_reclaimed += nr_soft_reclaimed; |
2739 | 3009 | ||
2740 | /* | 3010 | /* |
2741 | * We put equal pressure on every zone, unless | 3011 | * There should be no need to raise the scanning |
2742 | * one zone has way too many pages free | 3012 | * priority if enough pages are already being scanned |
2743 | * already. The "too many pages" is defined | 3013 | * that that high watermark would be met at 100% |
2744 | * as the high wmark plus a "gap" where the | 3014 | * efficiency. |
2745 | * gap is either the low watermark or 1% | ||
2746 | * of the zone, whichever is smaller. | ||
2747 | */ | 3015 | */ |
2748 | balance_gap = min(low_wmark_pages(zone), | 3016 | if (kswapd_shrink_zone(zone, end_zone, &sc, |
2749 | (zone->managed_pages + | 3017 | lru_pages, &nr_attempted)) |
2750 | KSWAPD_ZONE_BALANCE_GAP_RATIO-1) / | 3018 | raise_priority = false; |
2751 | KSWAPD_ZONE_BALANCE_GAP_RATIO); | ||
2752 | /* | ||
2753 | * Kswapd reclaims only single pages with compaction | ||
2754 | * enabled. Trying too hard to reclaim until contiguous | ||
2755 | * free pages have become available can hurt performance | ||
2756 | * by evicting too much useful data from memory. | ||
2757 | * Do not reclaim more than needed for compaction. | ||
2758 | */ | ||
2759 | testorder = order; | ||
2760 | if (IS_ENABLED(CONFIG_COMPACTION) && order && | ||
2761 | compaction_suitable(zone, order) != | ||
2762 | COMPACT_SKIPPED) | ||
2763 | testorder = 0; | ||
2764 | |||
2765 | if ((buffer_heads_over_limit && is_highmem_idx(i)) || | ||
2766 | !zone_balanced(zone, testorder, | ||
2767 | balance_gap, end_zone)) { | ||
2768 | shrink_zone(zone, &sc); | ||
2769 | |||
2770 | reclaim_state->reclaimed_slab = 0; | ||
2771 | nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); | ||
2772 | sc.nr_reclaimed += reclaim_state->reclaimed_slab; | ||
2773 | |||
2774 | if (nr_slab == 0 && !zone_reclaimable(zone)) | ||
2775 | zone->all_unreclaimable = 1; | ||
2776 | } | ||
2777 | |||
2778 | /* | ||
2779 | * If we're getting trouble reclaiming, start doing | ||
2780 | * writepage even in laptop mode. | ||
2781 | */ | ||
2782 | if (sc.priority < DEF_PRIORITY - 2) | ||
2783 | sc.may_writepage = 1; | ||
2784 | |||
2785 | if (zone->all_unreclaimable) { | ||
2786 | if (end_zone && end_zone == i) | ||
2787 | end_zone--; | ||
2788 | continue; | ||
2789 | } | ||
2790 | |||
2791 | if (zone_balanced(zone, testorder, 0, end_zone)) | ||
2792 | /* | ||
2793 | * If a zone reaches its high watermark, | ||
2794 | * consider it to be no longer congested. It's | ||
2795 | * possible there are dirty pages backed by | ||
2796 | * congested BDIs but as pressure is relieved, | ||
2797 | * speculatively avoid congestion waits | ||
2798 | */ | ||
2799 | zone_clear_flag(zone, ZONE_CONGESTED); | ||
2800 | } | 3019 | } |
2801 | 3020 | ||
2802 | /* | 3021 | /* |
@@ -2808,74 +3027,38 @@ loop_again: | |||
2808 | pfmemalloc_watermark_ok(pgdat)) | 3027 | pfmemalloc_watermark_ok(pgdat)) |
2809 | wake_up(&pgdat->pfmemalloc_wait); | 3028 | wake_up(&pgdat->pfmemalloc_wait); |
2810 | 3029 | ||
2811 | if (pgdat_balanced(pgdat, order, *classzone_idx)) { | ||
2812 | pgdat_is_balanced = true; | ||
2813 | break; /* kswapd: all done */ | ||
2814 | } | ||
2815 | |||
2816 | /* | 3030 | /* |
2817 | * We do this so kswapd doesn't build up large priorities for | 3031 | * Fragmentation may mean that the system cannot be rebalanced |
2818 | * example when it is freeing in parallel with allocators. It | 3032 | * for high-order allocations in all zones. If twice the |
2819 | * matches the direct reclaim path behaviour in terms of impact | 3033 | * allocation size has been reclaimed and the zones are still |
2820 | * on zone->*_priority. | 3034 | * not balanced then recheck the watermarks at order-0 to |
3035 | * prevent kswapd reclaiming excessively. Assume that a | ||
3036 | * process requested a high-order can direct reclaim/compact. | ||
2821 | */ | 3037 | */ |
2822 | if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) | 3038 | if (order && sc.nr_reclaimed >= 2UL << order) |
2823 | break; | 3039 | order = sc.order = 0; |
2824 | } while (--sc.priority >= 0); | ||
2825 | |||
2826 | out: | ||
2827 | if (!pgdat_is_balanced) { | ||
2828 | cond_resched(); | ||
2829 | 3040 | ||
2830 | try_to_freeze(); | 3041 | /* Check if kswapd should be suspending */ |
3042 | if (try_to_freeze() || kthread_should_stop()) | ||
3043 | break; | ||
2831 | 3044 | ||
2832 | /* | 3045 | /* |
2833 | * Fragmentation may mean that the system cannot be | 3046 | * Compact if necessary and kswapd is reclaiming at least the |
2834 | * rebalanced for high-order allocations in all zones. | 3047 | * high watermark number of pages as requsted |
2835 | * At this point, if nr_reclaimed < SWAP_CLUSTER_MAX, | ||
2836 | * it means the zones have been fully scanned and are still | ||
2837 | * not balanced. For high-order allocations, there is | ||
2838 | * little point trying all over again as kswapd may | ||
2839 | * infinite loop. | ||
2840 | * | ||
2841 | * Instead, recheck all watermarks at order-0 as they | ||
2842 | * are the most important. If watermarks are ok, kswapd will go | ||
2843 | * back to sleep. High-order users can still perform direct | ||
2844 | * reclaim if they wish. | ||
2845 | */ | 3048 | */ |
2846 | if (sc.nr_reclaimed < SWAP_CLUSTER_MAX) | 3049 | if (pgdat_needs_compaction && sc.nr_reclaimed > nr_attempted) |
2847 | order = sc.order = 0; | ||
2848 | |||
2849 | goto loop_again; | ||
2850 | } | ||
2851 | |||
2852 | /* | ||
2853 | * If kswapd was reclaiming at a higher order, it has the option of | ||
2854 | * sleeping without all zones being balanced. Before it does, it must | ||
2855 | * ensure that the watermarks for order-0 on *all* zones are met and | ||
2856 | * that the congestion flags are cleared. The congestion flag must | ||
2857 | * be cleared as kswapd is the only mechanism that clears the flag | ||
2858 | * and it is potentially going to sleep here. | ||
2859 | */ | ||
2860 | if (order) { | ||
2861 | int zones_need_compaction = 1; | ||
2862 | |||
2863 | for (i = 0; i <= end_zone; i++) { | ||
2864 | struct zone *zone = pgdat->node_zones + i; | ||
2865 | |||
2866 | if (!populated_zone(zone)) | ||
2867 | continue; | ||
2868 | |||
2869 | /* Check if the memory needs to be defragmented. */ | ||
2870 | if (zone_watermark_ok(zone, order, | ||
2871 | low_wmark_pages(zone), *classzone_idx, 0)) | ||
2872 | zones_need_compaction = 0; | ||
2873 | } | ||
2874 | |||
2875 | if (zones_need_compaction) | ||
2876 | compact_pgdat(pgdat, order); | 3050 | compact_pgdat(pgdat, order); |
2877 | } | ||
2878 | 3051 | ||
3052 | /* | ||
3053 | * Raise priority if scanning rate is too low or there was no | ||
3054 | * progress in reclaiming pages | ||
3055 | */ | ||
3056 | if (raise_priority || !sc.nr_reclaimed) | ||
3057 | sc.priority--; | ||
3058 | } while (sc.priority >= 1 && | ||
3059 | !pgdat_balanced(pgdat, order, *classzone_idx)); | ||
3060 | |||
3061 | out: | ||
2879 | /* | 3062 | /* |
2880 | * Return the order we were reclaiming at so prepare_kswapd_sleep() | 3063 | * Return the order we were reclaiming at so prepare_kswapd_sleep() |
2881 | * makes a decision on the order we were last reclaiming at. However, | 3064 | * makes a decision on the order we were last reclaiming at. However, |