diff options
author | Steven Whitehouse <steve@men-an-tol.chygwyn.com> | 2006-02-23 04:49:43 -0500 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-02-23 04:49:43 -0500 |
commit | d35462b4bb847b68321c55e95c926aa485aecce2 (patch) | |
tree | b08e18bf6e672633402871ee763102fdb5e63229 /mm/vmscan.c | |
parent | 91ffd7db71e7451f89941a8f428b4daa2a7c1e38 (diff) | |
parent | 9e956c2dac9bec602ed1ba29181b45ba6d2b6448 (diff) |
Merge branch 'master'
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 441 |
1 files changed, 373 insertions, 68 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 2e34b61a70c7..1838c15ca4fd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -443,6 +443,10 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
443 | BUG_ON(PageActive(page)); | 443 | BUG_ON(PageActive(page)); |
444 | 444 | ||
445 | sc->nr_scanned++; | 445 | sc->nr_scanned++; |
446 | |||
447 | if (!sc->may_swap && page_mapped(page)) | ||
448 | goto keep_locked; | ||
449 | |||
446 | /* Double the slab pressure for mapped and swapcache pages */ | 450 | /* Double the slab pressure for mapped and swapcache pages */ |
447 | if (page_mapped(page) || PageSwapCache(page)) | 451 | if (page_mapped(page) || PageSwapCache(page)) |
448 | sc->nr_scanned++; | 452 | sc->nr_scanned++; |
@@ -477,7 +481,13 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
477 | * processes. Try to unmap it here. | 481 | * processes. Try to unmap it here. |
478 | */ | 482 | */ |
479 | if (page_mapped(page) && mapping) { | 483 | if (page_mapped(page) && mapping) { |
480 | switch (try_to_unmap(page)) { | 484 | /* |
485 | * No unmapping if we do not swap | ||
486 | */ | ||
487 | if (!sc->may_swap) | ||
488 | goto keep_locked; | ||
489 | |||
490 | switch (try_to_unmap(page, 0)) { | ||
481 | case SWAP_FAIL: | 491 | case SWAP_FAIL: |
482 | goto activate_locked; | 492 | goto activate_locked; |
483 | case SWAP_AGAIN: | 493 | case SWAP_AGAIN: |
@@ -492,7 +502,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
492 | goto keep_locked; | 502 | goto keep_locked; |
493 | if (!may_enter_fs) | 503 | if (!may_enter_fs) |
494 | goto keep_locked; | 504 | goto keep_locked; |
495 | if (laptop_mode && !sc->may_writepage) | 505 | if (!sc->may_writepage) |
496 | goto keep_locked; | 506 | goto keep_locked; |
497 | 507 | ||
498 | /* Page is dirty, try to write it out here */ | 508 | /* Page is dirty, try to write it out here */ |
@@ -609,6 +619,15 @@ int putback_lru_pages(struct list_head *l) | |||
609 | } | 619 | } |
610 | 620 | ||
611 | /* | 621 | /* |
622 | * Non migratable page | ||
623 | */ | ||
624 | int fail_migrate_page(struct page *newpage, struct page *page) | ||
625 | { | ||
626 | return -EIO; | ||
627 | } | ||
628 | EXPORT_SYMBOL(fail_migrate_page); | ||
629 | |||
630 | /* | ||
612 | * swapout a single page | 631 | * swapout a single page |
613 | * page is locked upon entry, unlocked on exit | 632 | * page is locked upon entry, unlocked on exit |
614 | */ | 633 | */ |
@@ -617,7 +636,7 @@ static int swap_page(struct page *page) | |||
617 | struct address_space *mapping = page_mapping(page); | 636 | struct address_space *mapping = page_mapping(page); |
618 | 637 | ||
619 | if (page_mapped(page) && mapping) | 638 | if (page_mapped(page) && mapping) |
620 | if (try_to_unmap(page) != SWAP_SUCCESS) | 639 | if (try_to_unmap(page, 1) != SWAP_SUCCESS) |
621 | goto unlock_retry; | 640 | goto unlock_retry; |
622 | 641 | ||
623 | if (PageDirty(page)) { | 642 | if (PageDirty(page)) { |
@@ -653,6 +672,167 @@ unlock_retry: | |||
653 | retry: | 672 | retry: |
654 | return -EAGAIN; | 673 | return -EAGAIN; |
655 | } | 674 | } |
675 | EXPORT_SYMBOL(swap_page); | ||
676 | |||
677 | /* | ||
678 | * Page migration was first developed in the context of the memory hotplug | ||
679 | * project. The main authors of the migration code are: | ||
680 | * | ||
681 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | ||
682 | * Hirokazu Takahashi <taka@valinux.co.jp> | ||
683 | * Dave Hansen <haveblue@us.ibm.com> | ||
684 | * Christoph Lameter <clameter@sgi.com> | ||
685 | */ | ||
686 | |||
687 | /* | ||
688 | * Remove references for a page and establish the new page with the correct | ||
689 | * basic settings to be able to stop accesses to the page. | ||
690 | */ | ||
691 | int migrate_page_remove_references(struct page *newpage, | ||
692 | struct page *page, int nr_refs) | ||
693 | { | ||
694 | struct address_space *mapping = page_mapping(page); | ||
695 | struct page **radix_pointer; | ||
696 | |||
697 | /* | ||
698 | * Avoid doing any of the following work if the page count | ||
699 | * indicates that the page is in use or truncate has removed | ||
700 | * the page. | ||
701 | */ | ||
702 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | ||
703 | return 1; | ||
704 | |||
705 | /* | ||
706 | * Establish swap ptes for anonymous pages or destroy pte | ||
707 | * maps for files. | ||
708 | * | ||
709 | * In order to reestablish file backed mappings the fault handlers | ||
710 | * will take the radix tree_lock which may then be used to stop | ||
711 | * processses from accessing this page until the new page is ready. | ||
712 | * | ||
713 | * A process accessing via a swap pte (an anonymous page) will take a | ||
714 | * page_lock on the old page which will block the process until the | ||
715 | * migration attempt is complete. At that time the PageSwapCache bit | ||
716 | * will be examined. If the page was migrated then the PageSwapCache | ||
717 | * bit will be clear and the operation to retrieve the page will be | ||
718 | * retried which will find the new page in the radix tree. Then a new | ||
719 | * direct mapping may be generated based on the radix tree contents. | ||
720 | * | ||
721 | * If the page was not migrated then the PageSwapCache bit | ||
722 | * is still set and the operation may continue. | ||
723 | */ | ||
724 | try_to_unmap(page, 1); | ||
725 | |||
726 | /* | ||
727 | * Give up if we were unable to remove all mappings. | ||
728 | */ | ||
729 | if (page_mapcount(page)) | ||
730 | return 1; | ||
731 | |||
732 | write_lock_irq(&mapping->tree_lock); | ||
733 | |||
734 | radix_pointer = (struct page **)radix_tree_lookup_slot( | ||
735 | &mapping->page_tree, | ||
736 | page_index(page)); | ||
737 | |||
738 | if (!page_mapping(page) || page_count(page) != nr_refs || | ||
739 | *radix_pointer != page) { | ||
740 | write_unlock_irq(&mapping->tree_lock); | ||
741 | return 1; | ||
742 | } | ||
743 | |||
744 | /* | ||
745 | * Now we know that no one else is looking at the page. | ||
746 | * | ||
747 | * Certain minimal information about a page must be available | ||
748 | * in order for other subsystems to properly handle the page if they | ||
749 | * find it through the radix tree update before we are finished | ||
750 | * copying the page. | ||
751 | */ | ||
752 | get_page(newpage); | ||
753 | newpage->index = page->index; | ||
754 | newpage->mapping = page->mapping; | ||
755 | if (PageSwapCache(page)) { | ||
756 | SetPageSwapCache(newpage); | ||
757 | set_page_private(newpage, page_private(page)); | ||
758 | } | ||
759 | |||
760 | *radix_pointer = newpage; | ||
761 | __put_page(page); | ||
762 | write_unlock_irq(&mapping->tree_lock); | ||
763 | |||
764 | return 0; | ||
765 | } | ||
766 | EXPORT_SYMBOL(migrate_page_remove_references); | ||
767 | |||
768 | /* | ||
769 | * Copy the page to its new location | ||
770 | */ | ||
771 | void migrate_page_copy(struct page *newpage, struct page *page) | ||
772 | { | ||
773 | copy_highpage(newpage, page); | ||
774 | |||
775 | if (PageError(page)) | ||
776 | SetPageError(newpage); | ||
777 | if (PageReferenced(page)) | ||
778 | SetPageReferenced(newpage); | ||
779 | if (PageUptodate(page)) | ||
780 | SetPageUptodate(newpage); | ||
781 | if (PageActive(page)) | ||
782 | SetPageActive(newpage); | ||
783 | if (PageChecked(page)) | ||
784 | SetPageChecked(newpage); | ||
785 | if (PageMappedToDisk(page)) | ||
786 | SetPageMappedToDisk(newpage); | ||
787 | |||
788 | if (PageDirty(page)) { | ||
789 | clear_page_dirty_for_io(page); | ||
790 | set_page_dirty(newpage); | ||
791 | } | ||
792 | |||
793 | ClearPageSwapCache(page); | ||
794 | ClearPageActive(page); | ||
795 | ClearPagePrivate(page); | ||
796 | set_page_private(page, 0); | ||
797 | page->mapping = NULL; | ||
798 | |||
799 | /* | ||
800 | * If any waiters have accumulated on the new page then | ||
801 | * wake them up. | ||
802 | */ | ||
803 | if (PageWriteback(newpage)) | ||
804 | end_page_writeback(newpage); | ||
805 | } | ||
806 | EXPORT_SYMBOL(migrate_page_copy); | ||
807 | |||
808 | /* | ||
809 | * Common logic to directly migrate a single page suitable for | ||
810 | * pages that do not use PagePrivate. | ||
811 | * | ||
812 | * Pages are locked upon entry and exit. | ||
813 | */ | ||
814 | int migrate_page(struct page *newpage, struct page *page) | ||
815 | { | ||
816 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | ||
817 | |||
818 | if (migrate_page_remove_references(newpage, page, 2)) | ||
819 | return -EAGAIN; | ||
820 | |||
821 | migrate_page_copy(newpage, page); | ||
822 | |||
823 | /* | ||
824 | * Remove auxiliary swap entries and replace | ||
825 | * them with real ptes. | ||
826 | * | ||
827 | * Note that a real pte entry will allow processes that are not | ||
828 | * waiting on the page lock to use the new page via the page tables | ||
829 | * before the new page is unlocked. | ||
830 | */ | ||
831 | remove_from_swap(newpage); | ||
832 | return 0; | ||
833 | } | ||
834 | EXPORT_SYMBOL(migrate_page); | ||
835 | |||
656 | /* | 836 | /* |
657 | * migrate_pages | 837 | * migrate_pages |
658 | * | 838 | * |
@@ -663,14 +843,9 @@ retry: | |||
663 | * pages are swapped out. | 843 | * pages are swapped out. |
664 | * | 844 | * |
665 | * The function returns after 10 attempts or if no pages | 845 | * The function returns after 10 attempts or if no pages |
666 | * are movable anymore because t has become empty | 846 | * are movable anymore because to has become empty |
667 | * or no retryable pages exist anymore. | 847 | * or no retryable pages exist anymore. |
668 | * | 848 | * |
669 | * SIMPLIFIED VERSION: This implementation of migrate_pages | ||
670 | * is only swapping out pages and never touches the second | ||
671 | * list. The direct migration patchset | ||
672 | * extends this function to avoid the use of swap. | ||
673 | * | ||
674 | * Return: Number of pages not migrated when "to" ran empty. | 849 | * Return: Number of pages not migrated when "to" ran empty. |
675 | */ | 850 | */ |
676 | int migrate_pages(struct list_head *from, struct list_head *to, | 851 | int migrate_pages(struct list_head *from, struct list_head *to, |
@@ -691,6 +866,9 @@ redo: | |||
691 | retry = 0; | 866 | retry = 0; |
692 | 867 | ||
693 | list_for_each_entry_safe(page, page2, from, lru) { | 868 | list_for_each_entry_safe(page, page2, from, lru) { |
869 | struct page *newpage = NULL; | ||
870 | struct address_space *mapping; | ||
871 | |||
694 | cond_resched(); | 872 | cond_resched(); |
695 | 873 | ||
696 | rc = 0; | 874 | rc = 0; |
@@ -698,6 +876,9 @@ redo: | |||
698 | /* page was freed from under us. So we are done. */ | 876 | /* page was freed from under us. So we are done. */ |
699 | goto next; | 877 | goto next; |
700 | 878 | ||
879 | if (to && list_empty(to)) | ||
880 | break; | ||
881 | |||
701 | /* | 882 | /* |
702 | * Skip locked pages during the first two passes to give the | 883 | * Skip locked pages during the first two passes to give the |
703 | * functions holding the lock time to release the page. Later we | 884 | * functions holding the lock time to release the page. Later we |
@@ -734,12 +915,84 @@ redo: | |||
734 | } | 915 | } |
735 | } | 916 | } |
736 | 917 | ||
918 | if (!to) { | ||
919 | rc = swap_page(page); | ||
920 | goto next; | ||
921 | } | ||
922 | |||
923 | newpage = lru_to_page(to); | ||
924 | lock_page(newpage); | ||
925 | |||
737 | /* | 926 | /* |
738 | * Page is properly locked and writeback is complete. | 927 | * Pages are properly locked and writeback is complete. |
739 | * Try to migrate the page. | 928 | * Try to migrate the page. |
740 | */ | 929 | */ |
741 | rc = swap_page(page); | 930 | mapping = page_mapping(page); |
742 | goto next; | 931 | if (!mapping) |
932 | goto unlock_both; | ||
933 | |||
934 | if (mapping->a_ops->migratepage) { | ||
935 | /* | ||
936 | * Most pages have a mapping and most filesystems | ||
937 | * should provide a migration function. Anonymous | ||
938 | * pages are part of swap space which also has its | ||
939 | * own migration function. This is the most common | ||
940 | * path for page migration. | ||
941 | */ | ||
942 | rc = mapping->a_ops->migratepage(newpage, page); | ||
943 | goto unlock_both; | ||
944 | } | ||
945 | |||
946 | /* | ||
947 | * Default handling if a filesystem does not provide | ||
948 | * a migration function. We can only migrate clean | ||
949 | * pages so try to write out any dirty pages first. | ||
950 | */ | ||
951 | if (PageDirty(page)) { | ||
952 | switch (pageout(page, mapping)) { | ||
953 | case PAGE_KEEP: | ||
954 | case PAGE_ACTIVATE: | ||
955 | goto unlock_both; | ||
956 | |||
957 | case PAGE_SUCCESS: | ||
958 | unlock_page(newpage); | ||
959 | goto next; | ||
960 | |||
961 | case PAGE_CLEAN: | ||
962 | ; /* try to migrate the page below */ | ||
963 | } | ||
964 | } | ||
965 | |||
966 | /* | ||
967 | * Buffers are managed in a filesystem specific way. | ||
968 | * We must have no buffers or drop them. | ||
969 | */ | ||
970 | if (!page_has_buffers(page) || | ||
971 | try_to_release_page(page, GFP_KERNEL)) { | ||
972 | rc = migrate_page(newpage, page); | ||
973 | goto unlock_both; | ||
974 | } | ||
975 | |||
976 | /* | ||
977 | * On early passes with mapped pages simply | ||
978 | * retry. There may be a lock held for some | ||
979 | * buffers that may go away. Later | ||
980 | * swap them out. | ||
981 | */ | ||
982 | if (pass > 4) { | ||
983 | /* | ||
984 | * Persistently unable to drop buffers..... As a | ||
985 | * measure of last resort we fall back to | ||
986 | * swap_page(). | ||
987 | */ | ||
988 | unlock_page(newpage); | ||
989 | newpage = NULL; | ||
990 | rc = swap_page(page); | ||
991 | goto next; | ||
992 | } | ||
993 | |||
994 | unlock_both: | ||
995 | unlock_page(newpage); | ||
743 | 996 | ||
744 | unlock_page: | 997 | unlock_page: |
745 | unlock_page(page); | 998 | unlock_page(page); |
@@ -752,7 +1005,10 @@ next: | |||
752 | list_move(&page->lru, failed); | 1005 | list_move(&page->lru, failed); |
753 | nr_failed++; | 1006 | nr_failed++; |
754 | } else { | 1007 | } else { |
755 | /* Success */ | 1008 | if (newpage) { |
1009 | /* Successful migration. Return page to LRU */ | ||
1010 | move_to_lru(newpage); | ||
1011 | } | ||
756 | list_move(&page->lru, moved); | 1012 | list_move(&page->lru, moved); |
757 | } | 1013 | } |
758 | } | 1014 | } |
@@ -939,9 +1195,47 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
939 | struct page *page; | 1195 | struct page *page; |
940 | struct pagevec pvec; | 1196 | struct pagevec pvec; |
941 | int reclaim_mapped = 0; | 1197 | int reclaim_mapped = 0; |
942 | long mapped_ratio; | 1198 | |
943 | long distress; | 1199 | if (unlikely(sc->may_swap)) { |
944 | long swap_tendency; | 1200 | long mapped_ratio; |
1201 | long distress; | ||
1202 | long swap_tendency; | ||
1203 | |||
1204 | /* | ||
1205 | * `distress' is a measure of how much trouble we're having | ||
1206 | * reclaiming pages. 0 -> no problems. 100 -> great trouble. | ||
1207 | */ | ||
1208 | distress = 100 >> zone->prev_priority; | ||
1209 | |||
1210 | /* | ||
1211 | * The point of this algorithm is to decide when to start | ||
1212 | * reclaiming mapped memory instead of just pagecache. Work out | ||
1213 | * how much memory | ||
1214 | * is mapped. | ||
1215 | */ | ||
1216 | mapped_ratio = (sc->nr_mapped * 100) / total_memory; | ||
1217 | |||
1218 | /* | ||
1219 | * Now decide how much we really want to unmap some pages. The | ||
1220 | * mapped ratio is downgraded - just because there's a lot of | ||
1221 | * mapped memory doesn't necessarily mean that page reclaim | ||
1222 | * isn't succeeding. | ||
1223 | * | ||
1224 | * The distress ratio is important - we don't want to start | ||
1225 | * going oom. | ||
1226 | * | ||
1227 | * A 100% value of vm_swappiness overrides this algorithm | ||
1228 | * altogether. | ||
1229 | */ | ||
1230 | swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; | ||
1231 | |||
1232 | /* | ||
1233 | * Now use this metric to decide whether to start moving mapped | ||
1234 | * memory onto the inactive list. | ||
1235 | */ | ||
1236 | if (swap_tendency >= 100) | ||
1237 | reclaim_mapped = 1; | ||
1238 | } | ||
945 | 1239 | ||
946 | lru_add_drain(); | 1240 | lru_add_drain(); |
947 | spin_lock_irq(&zone->lru_lock); | 1241 | spin_lock_irq(&zone->lru_lock); |
@@ -951,37 +1245,6 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc) | |||
951 | zone->nr_active -= pgmoved; | 1245 | zone->nr_active -= pgmoved; |
952 | spin_unlock_irq(&zone->lru_lock); | 1246 | spin_unlock_irq(&zone->lru_lock); |
953 | 1247 | ||
954 | /* | ||
955 | * `distress' is a measure of how much trouble we're having reclaiming | ||
956 | * pages. 0 -> no problems. 100 -> great trouble. | ||
957 | */ | ||
958 | distress = 100 >> zone->prev_priority; | ||
959 | |||
960 | /* | ||
961 | * The point of this algorithm is to decide when to start reclaiming | ||
962 | * mapped memory instead of just pagecache. Work out how much memory | ||
963 | * is mapped. | ||
964 | */ | ||
965 | mapped_ratio = (sc->nr_mapped * 100) / total_memory; | ||
966 | |||
967 | /* | ||
968 | * Now decide how much we really want to unmap some pages. The mapped | ||
969 | * ratio is downgraded - just because there's a lot of mapped memory | ||
970 | * doesn't necessarily mean that page reclaim isn't succeeding. | ||
971 | * | ||
972 | * The distress ratio is important - we don't want to start going oom. | ||
973 | * | ||
974 | * A 100% value of vm_swappiness overrides this algorithm altogether. | ||
975 | */ | ||
976 | swap_tendency = mapped_ratio / 2 + distress + vm_swappiness; | ||
977 | |||
978 | /* | ||
979 | * Now use this metric to decide whether to start moving mapped memory | ||
980 | * onto the inactive list. | ||
981 | */ | ||
982 | if (swap_tendency >= 100) | ||
983 | reclaim_mapped = 1; | ||
984 | |||
985 | while (!list_empty(&l_hold)) { | 1248 | while (!list_empty(&l_hold)) { |
986 | cond_resched(); | 1249 | cond_resched(); |
987 | page = lru_to_page(&l_hold); | 1250 | page = lru_to_page(&l_hold); |
@@ -1170,7 +1433,7 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1170 | int i; | 1433 | int i; |
1171 | 1434 | ||
1172 | sc.gfp_mask = gfp_mask; | 1435 | sc.gfp_mask = gfp_mask; |
1173 | sc.may_writepage = 0; | 1436 | sc.may_writepage = !laptop_mode; |
1174 | sc.may_swap = 1; | 1437 | sc.may_swap = 1; |
1175 | 1438 | ||
1176 | inc_page_state(allocstall); | 1439 | inc_page_state(allocstall); |
@@ -1273,7 +1536,7 @@ loop_again: | |||
1273 | total_scanned = 0; | 1536 | total_scanned = 0; |
1274 | total_reclaimed = 0; | 1537 | total_reclaimed = 0; |
1275 | sc.gfp_mask = GFP_KERNEL; | 1538 | sc.gfp_mask = GFP_KERNEL; |
1276 | sc.may_writepage = 0; | 1539 | sc.may_writepage = !laptop_mode; |
1277 | sc.may_swap = 1; | 1540 | sc.may_swap = 1; |
1278 | sc.nr_mapped = read_page_state(nr_mapped); | 1541 | sc.nr_mapped = read_page_state(nr_mapped); |
1279 | 1542 | ||
@@ -1358,9 +1621,7 @@ scan: | |||
1358 | sc.nr_reclaimed = 0; | 1621 | sc.nr_reclaimed = 0; |
1359 | sc.priority = priority; | 1622 | sc.priority = priority; |
1360 | sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; | 1623 | sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX; |
1361 | atomic_inc(&zone->reclaim_in_progress); | ||
1362 | shrink_zone(zone, &sc); | 1624 | shrink_zone(zone, &sc); |
1363 | atomic_dec(&zone->reclaim_in_progress); | ||
1364 | reclaim_state->reclaimed_slab = 0; | 1625 | reclaim_state->reclaimed_slab = 0; |
1365 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, | 1626 | nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL, |
1366 | lru_pages); | 1627 | lru_pages); |
@@ -1586,40 +1847,61 @@ module_init(kswapd_init) | |||
1586 | */ | 1847 | */ |
1587 | int zone_reclaim_mode __read_mostly; | 1848 | int zone_reclaim_mode __read_mostly; |
1588 | 1849 | ||
1850 | #define RECLAIM_OFF 0 | ||
1851 | #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ | ||
1852 | #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ | ||
1853 | #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ | ||
1854 | #define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */ | ||
1855 | |||
1589 | /* | 1856 | /* |
1590 | * Mininum time between zone reclaim scans | 1857 | * Mininum time between zone reclaim scans |
1591 | */ | 1858 | */ |
1592 | #define ZONE_RECLAIM_INTERVAL HZ/2 | 1859 | int zone_reclaim_interval __read_mostly = 30*HZ; |
1860 | |||
1861 | /* | ||
1862 | * Priority for ZONE_RECLAIM. This determines the fraction of pages | ||
1863 | * of a node considered for each zone_reclaim. 4 scans 1/16th of | ||
1864 | * a zone. | ||
1865 | */ | ||
1866 | #define ZONE_RECLAIM_PRIORITY 4 | ||
1867 | |||
1593 | /* | 1868 | /* |
1594 | * Try to free up some pages from this zone through reclaim. | 1869 | * Try to free up some pages from this zone through reclaim. |
1595 | */ | 1870 | */ |
1596 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | 1871 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) |
1597 | { | 1872 | { |
1598 | int nr_pages = 1 << order; | 1873 | int nr_pages; |
1599 | struct task_struct *p = current; | 1874 | struct task_struct *p = current; |
1600 | struct reclaim_state reclaim_state; | 1875 | struct reclaim_state reclaim_state; |
1601 | struct scan_control sc = { | 1876 | struct scan_control sc; |
1602 | .gfp_mask = gfp_mask, | 1877 | cpumask_t mask; |
1603 | .may_writepage = 0, | 1878 | int node_id; |
1604 | .may_swap = 0, | 1879 | |
1605 | .nr_mapped = read_page_state(nr_mapped), | 1880 | if (time_before(jiffies, |
1606 | .nr_scanned = 0, | 1881 | zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval)) |
1607 | .nr_reclaimed = 0, | 1882 | return 0; |
1608 | .priority = 0 | ||
1609 | }; | ||
1610 | 1883 | ||
1611 | if (!(gfp_mask & __GFP_WAIT) || | 1884 | if (!(gfp_mask & __GFP_WAIT) || |
1612 | zone->zone_pgdat->node_id != numa_node_id() || | ||
1613 | zone->all_unreclaimable || | 1885 | zone->all_unreclaimable || |
1614 | atomic_read(&zone->reclaim_in_progress) > 0) | 1886 | atomic_read(&zone->reclaim_in_progress) > 0) |
1615 | return 0; | 1887 | return 0; |
1616 | 1888 | ||
1617 | if (time_before(jiffies, | 1889 | node_id = zone->zone_pgdat->node_id; |
1618 | zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL)) | 1890 | mask = node_to_cpumask(node_id); |
1619 | return 0; | 1891 | if (!cpus_empty(mask) && node_id != numa_node_id()) |
1892 | return 0; | ||
1893 | |||
1894 | sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE); | ||
1895 | sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP); | ||
1896 | sc.nr_scanned = 0; | ||
1897 | sc.nr_reclaimed = 0; | ||
1898 | sc.priority = ZONE_RECLAIM_PRIORITY + 1; | ||
1899 | sc.nr_mapped = read_page_state(nr_mapped); | ||
1900 | sc.gfp_mask = gfp_mask; | ||
1620 | 1901 | ||
1621 | disable_swap_token(); | 1902 | disable_swap_token(); |
1622 | 1903 | ||
1904 | nr_pages = 1 << order; | ||
1623 | if (nr_pages > SWAP_CLUSTER_MAX) | 1905 | if (nr_pages > SWAP_CLUSTER_MAX) |
1624 | sc.swap_cluster_max = nr_pages; | 1906 | sc.swap_cluster_max = nr_pages; |
1625 | else | 1907 | else |
@@ -1629,14 +1911,37 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1629 | p->flags |= PF_MEMALLOC; | 1911 | p->flags |= PF_MEMALLOC; |
1630 | reclaim_state.reclaimed_slab = 0; | 1912 | reclaim_state.reclaimed_slab = 0; |
1631 | p->reclaim_state = &reclaim_state; | 1913 | p->reclaim_state = &reclaim_state; |
1632 | shrink_zone(zone, &sc); | 1914 | |
1915 | /* | ||
1916 | * Free memory by calling shrink zone with increasing priorities | ||
1917 | * until we have enough memory freed. | ||
1918 | */ | ||
1919 | do { | ||
1920 | sc.priority--; | ||
1921 | shrink_zone(zone, &sc); | ||
1922 | |||
1923 | } while (sc.nr_reclaimed < nr_pages && sc.priority > 0); | ||
1924 | |||
1925 | if (sc.nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { | ||
1926 | /* | ||
1927 | * shrink_slab does not currently allow us to determine | ||
1928 | * how many pages were freed in the zone. So we just | ||
1929 | * shake the slab and then go offnode for a single allocation. | ||
1930 | * | ||
1931 | * shrink_slab will free memory on all zones and may take | ||
1932 | * a long time. | ||
1933 | */ | ||
1934 | shrink_slab(sc.nr_scanned, gfp_mask, order); | ||
1935 | sc.nr_reclaimed = 1; /* Avoid getting the off node timeout */ | ||
1936 | } | ||
1937 | |||
1633 | p->reclaim_state = NULL; | 1938 | p->reclaim_state = NULL; |
1634 | current->flags &= ~PF_MEMALLOC; | 1939 | current->flags &= ~PF_MEMALLOC; |
1635 | 1940 | ||
1636 | if (sc.nr_reclaimed == 0) | 1941 | if (sc.nr_reclaimed == 0) |
1637 | zone->last_unsuccessful_zone_reclaim = jiffies; | 1942 | zone->last_unsuccessful_zone_reclaim = jiffies; |
1638 | 1943 | ||
1639 | return sc.nr_reclaimed > nr_pages; | 1944 | return sc.nr_reclaimed >= nr_pages; |
1640 | } | 1945 | } |
1641 | #endif | 1946 | #endif |
1642 | 1947 | ||