diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 343 |
1 files changed, 312 insertions, 31 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 2e34b61a70c7..5a610804cd06 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -477,7 +477,13 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
477 | * processes. Try to unmap it here. | 477 | * processes. Try to unmap it here. |
478 | */ | 478 | */ |
479 | if (page_mapped(page) && mapping) { | 479 | if (page_mapped(page) && mapping) { |
480 | switch (try_to_unmap(page)) { | 480 | /* |
481 | * No unmapping if we do not swap | ||
482 | */ | ||
483 | if (!sc->may_swap) | ||
484 | goto keep_locked; | ||
485 | |||
486 | switch (try_to_unmap(page, 0)) { | ||
481 | case SWAP_FAIL: | 487 | case SWAP_FAIL: |
482 | goto activate_locked; | 488 | goto activate_locked; |
483 | case SWAP_AGAIN: | 489 | case SWAP_AGAIN: |
@@ -492,7 +498,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc) | |||
492 | goto keep_locked; | 498 | goto keep_locked; |
493 | if (!may_enter_fs) | 499 | if (!may_enter_fs) |
494 | goto keep_locked; | 500 | goto keep_locked; |
495 | if (laptop_mode && !sc->may_writepage) | 501 | if (!sc->may_writepage) |
496 | goto keep_locked; | 502 | goto keep_locked; |
497 | 503 | ||
498 | /* Page is dirty, try to write it out here */ | 504 | /* Page is dirty, try to write it out here */ |
@@ -609,6 +615,15 @@ int putback_lru_pages(struct list_head *l) | |||
609 | } | 615 | } |
610 | 616 | ||
611 | /* | 617 | /* |
618 | * Non migratable page | ||
619 | */ | ||
620 | int fail_migrate_page(struct page *newpage, struct page *page) | ||
621 | { | ||
622 | return -EIO; | ||
623 | } | ||
624 | EXPORT_SYMBOL(fail_migrate_page); | ||
625 | |||
626 | /* | ||
612 | * swapout a single page | 627 | * swapout a single page |
613 | * page is locked upon entry, unlocked on exit | 628 | * page is locked upon entry, unlocked on exit |
614 | */ | 629 | */ |
@@ -617,7 +632,7 @@ static int swap_page(struct page *page) | |||
617 | struct address_space *mapping = page_mapping(page); | 632 | struct address_space *mapping = page_mapping(page); |
618 | 633 | ||
619 | if (page_mapped(page) && mapping) | 634 | if (page_mapped(page) && mapping) |
620 | if (try_to_unmap(page) != SWAP_SUCCESS) | 635 | if (try_to_unmap(page, 0) != SWAP_SUCCESS) |
621 | goto unlock_retry; | 636 | goto unlock_retry; |
622 | 637 | ||
623 | if (PageDirty(page)) { | 638 | if (PageDirty(page)) { |
@@ -653,6 +668,167 @@ unlock_retry: | |||
653 | retry: | 668 | retry: |
654 | return -EAGAIN; | 669 | return -EAGAIN; |
655 | } | 670 | } |
671 | EXPORT_SYMBOL(swap_page); | ||
672 | |||
673 | /* | ||
674 | * Page migration was first developed in the context of the memory hotplug | ||
675 | * project. The main authors of the migration code are: | ||
676 | * | ||
677 | * IWAMOTO Toshihiro <iwamoto@valinux.co.jp> | ||
678 | * Hirokazu Takahashi <taka@valinux.co.jp> | ||
679 | * Dave Hansen <haveblue@us.ibm.com> | ||
680 | * Christoph Lameter <clameter@sgi.com> | ||
681 | */ | ||
682 | |||
683 | /* | ||
684 | * Remove references for a page and establish the new page with the correct | ||
685 | * basic settings to be able to stop accesses to the page. | ||
686 | */ | ||
687 | int migrate_page_remove_references(struct page *newpage, | ||
688 | struct page *page, int nr_refs) | ||
689 | { | ||
690 | struct address_space *mapping = page_mapping(page); | ||
691 | struct page **radix_pointer; | ||
692 | |||
693 | /* | ||
694 | * Avoid doing any of the following work if the page count | ||
695 | * indicates that the page is in use or truncate has removed | ||
696 | * the page. | ||
697 | */ | ||
698 | if (!mapping || page_mapcount(page) + nr_refs != page_count(page)) | ||
699 | return 1; | ||
700 | |||
701 | /* | ||
702 | * Establish swap ptes for anonymous pages or destroy pte | ||
703 | * maps for files. | ||
704 | * | ||
705 | * In order to reestablish file backed mappings the fault handlers | ||
706 | * will take the radix tree_lock which may then be used to stop | ||
707 | * processses from accessing this page until the new page is ready. | ||
708 | * | ||
709 | * A process accessing via a swap pte (an anonymous page) will take a | ||
710 | * page_lock on the old page which will block the process until the | ||
711 | * migration attempt is complete. At that time the PageSwapCache bit | ||
712 | * will be examined. If the page was migrated then the PageSwapCache | ||
713 | * bit will be clear and the operation to retrieve the page will be | ||
714 | * retried which will find the new page in the radix tree. Then a new | ||
715 | * direct mapping may be generated based on the radix tree contents. | ||
716 | * | ||
717 | * If the page was not migrated then the PageSwapCache bit | ||
718 | * is still set and the operation may continue. | ||
719 | */ | ||
720 | try_to_unmap(page, 1); | ||
721 | |||
722 | /* | ||
723 | * Give up if we were unable to remove all mappings. | ||
724 | */ | ||
725 | if (page_mapcount(page)) | ||
726 | return 1; | ||
727 | |||
728 | write_lock_irq(&mapping->tree_lock); | ||
729 | |||
730 | radix_pointer = (struct page **)radix_tree_lookup_slot( | ||
731 | &mapping->page_tree, | ||
732 | page_index(page)); | ||
733 | |||
734 | if (!page_mapping(page) || page_count(page) != nr_refs || | ||
735 | *radix_pointer != page) { | ||
736 | write_unlock_irq(&mapping->tree_lock); | ||
737 | return 1; | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * Now we know that no one else is looking at the page. | ||
742 | * | ||
743 | * Certain minimal information about a page must be available | ||
744 | * in order for other subsystems to properly handle the page if they | ||
745 | * find it through the radix tree update before we are finished | ||
746 | * copying the page. | ||
747 | */ | ||
748 | get_page(newpage); | ||
749 | newpage->index = page->index; | ||
750 | newpage->mapping = page->mapping; | ||
751 | if (PageSwapCache(page)) { | ||
752 | SetPageSwapCache(newpage); | ||
753 | set_page_private(newpage, page_private(page)); | ||
754 | } | ||
755 | |||
756 | *radix_pointer = newpage; | ||
757 | __put_page(page); | ||
758 | write_unlock_irq(&mapping->tree_lock); | ||
759 | |||
760 | return 0; | ||
761 | } | ||
762 | EXPORT_SYMBOL(migrate_page_remove_references); | ||
763 | |||
764 | /* | ||
765 | * Copy the page to its new location | ||
766 | */ | ||
767 | void migrate_page_copy(struct page *newpage, struct page *page) | ||
768 | { | ||
769 | copy_highpage(newpage, page); | ||
770 | |||
771 | if (PageError(page)) | ||
772 | SetPageError(newpage); | ||
773 | if (PageReferenced(page)) | ||
774 | SetPageReferenced(newpage); | ||
775 | if (PageUptodate(page)) | ||
776 | SetPageUptodate(newpage); | ||
777 | if (PageActive(page)) | ||
778 | SetPageActive(newpage); | ||
779 | if (PageChecked(page)) | ||
780 | SetPageChecked(newpage); | ||
781 | if (PageMappedToDisk(page)) | ||
782 | SetPageMappedToDisk(newpage); | ||
783 | |||
784 | if (PageDirty(page)) { | ||
785 | clear_page_dirty_for_io(page); | ||
786 | set_page_dirty(newpage); | ||
787 | } | ||
788 | |||
789 | ClearPageSwapCache(page); | ||
790 | ClearPageActive(page); | ||
791 | ClearPagePrivate(page); | ||
792 | set_page_private(page, 0); | ||
793 | page->mapping = NULL; | ||
794 | |||
795 | /* | ||
796 | * If any waiters have accumulated on the new page then | ||
797 | * wake them up. | ||
798 | */ | ||
799 | if (PageWriteback(newpage)) | ||
800 | end_page_writeback(newpage); | ||
801 | } | ||
802 | EXPORT_SYMBOL(migrate_page_copy); | ||
803 | |||
804 | /* | ||
805 | * Common logic to directly migrate a single page suitable for | ||
806 | * pages that do not use PagePrivate. | ||
807 | * | ||
808 | * Pages are locked upon entry and exit. | ||
809 | */ | ||
810 | int migrate_page(struct page *newpage, struct page *page) | ||
811 | { | ||
812 | BUG_ON(PageWriteback(page)); /* Writeback must be complete */ | ||
813 | |||
814 | if (migrate_page_remove_references(newpage, page, 2)) | ||
815 | return -EAGAIN; | ||
816 | |||
817 | migrate_page_copy(newpage, page); | ||
818 | |||
819 | /* | ||
820 | * Remove auxiliary swap entries and replace | ||
821 | * them with real ptes. | ||
822 | * | ||
823 | * Note that a real pte entry will allow processes that are not | ||
824 | * waiting on the page lock to use the new page via the page tables | ||
825 | * before the new page is unlocked. | ||
826 | */ | ||
827 | remove_from_swap(newpage); | ||
828 | return 0; | ||
829 | } | ||
830 | EXPORT_SYMBOL(migrate_page); | ||
831 | |||
656 | /* | 832 | /* |
657 | * migrate_pages | 833 | * migrate_pages |
658 | * | 834 | * |
@@ -666,11 +842,6 @@ retry: | |||
666 | * are movable anymore because t has become empty | 842 | * are movable anymore because t has become empty |
667 | * or no retryable pages exist anymore. | 843 | * or no retryable pages exist anymore. |
668 | * | 844 | * |
669 | * SIMPLIFIED VERSION: This implementation of migrate_pages | ||
670 | * is only swapping out pages and never touches the second | ||
671 | * list. The direct migration patchset | ||
672 | * extends this function to avoid the use of swap. | ||
673 | * | ||
674 | * Return: Number of pages not migrated when "to" ran empty. | 845 | * Return: Number of pages not migrated when "to" ran empty. |
675 | */ | 846 | */ |
676 | int migrate_pages(struct list_head *from, struct list_head *to, | 847 | int migrate_pages(struct list_head *from, struct list_head *to, |
@@ -691,6 +862,9 @@ redo: | |||
691 | retry = 0; | 862 | retry = 0; |
692 | 863 | ||
693 | list_for_each_entry_safe(page, page2, from, lru) { | 864 | list_for_each_entry_safe(page, page2, from, lru) { |
865 | struct page *newpage = NULL; | ||
866 | struct address_space *mapping; | ||
867 | |||
694 | cond_resched(); | 868 | cond_resched(); |
695 | 869 | ||
696 | rc = 0; | 870 | rc = 0; |
@@ -698,6 +872,9 @@ redo: | |||
698 | /* page was freed from under us. So we are done. */ | 872 | /* page was freed from under us. So we are done. */ |
699 | goto next; | 873 | goto next; |
700 | 874 | ||
875 | if (to && list_empty(to)) | ||
876 | break; | ||
877 | |||
701 | /* | 878 | /* |
702 | * Skip locked pages during the first two passes to give the | 879 | * Skip locked pages during the first two passes to give the |
703 | * functions holding the lock time to release the page. Later we | 880 | * functions holding the lock time to release the page. Later we |
@@ -734,12 +911,69 @@ redo: | |||
734 | } | 911 | } |
735 | } | 912 | } |
736 | 913 | ||
914 | if (!to) { | ||
915 | rc = swap_page(page); | ||
916 | goto next; | ||
917 | } | ||
918 | |||
919 | newpage = lru_to_page(to); | ||
920 | lock_page(newpage); | ||
921 | |||
737 | /* | 922 | /* |
738 | * Page is properly locked and writeback is complete. | 923 | * Pages are properly locked and writeback is complete. |
739 | * Try to migrate the page. | 924 | * Try to migrate the page. |
740 | */ | 925 | */ |
741 | rc = swap_page(page); | 926 | mapping = page_mapping(page); |
742 | goto next; | 927 | if (!mapping) |
928 | goto unlock_both; | ||
929 | |||
930 | if (mapping->a_ops->migratepage) { | ||
931 | rc = mapping->a_ops->migratepage(newpage, page); | ||
932 | goto unlock_both; | ||
933 | } | ||
934 | |||
935 | /* | ||
936 | * Trigger writeout if page is dirty | ||
937 | */ | ||
938 | if (PageDirty(page)) { | ||
939 | switch (pageout(page, mapping)) { | ||
940 | case PAGE_KEEP: | ||
941 | case PAGE_ACTIVATE: | ||
942 | goto unlock_both; | ||
943 | |||
944 | case PAGE_SUCCESS: | ||
945 | unlock_page(newpage); | ||
946 | goto next; | ||
947 | |||
948 | case PAGE_CLEAN: | ||
949 | ; /* try to migrate the page below */ | ||
950 | } | ||
951 | } | ||
952 | /* | ||
953 | * If we have no buffer or can release the buffer | ||
954 | * then do a simple migration. | ||
955 | */ | ||
956 | if (!page_has_buffers(page) || | ||
957 | try_to_release_page(page, GFP_KERNEL)) { | ||
958 | rc = migrate_page(newpage, page); | ||
959 | goto unlock_both; | ||
960 | } | ||
961 | |||
962 | /* | ||
963 | * On early passes with mapped pages simply | ||
964 | * retry. There may be a lock held for some | ||
965 | * buffers that may go away. Later | ||
966 | * swap them out. | ||
967 | */ | ||
968 | if (pass > 4) { | ||
969 | unlock_page(newpage); | ||
970 | newpage = NULL; | ||
971 | rc = swap_page(page); | ||
972 | goto next; | ||
973 | } | ||
974 | |||
975 | unlock_both: | ||
976 | unlock_page(newpage); | ||
743 | 977 | ||
744 | unlock_page: | 978 | unlock_page: |
745 | unlock_page(page); | 979 | unlock_page(page); |
@@ -752,7 +986,10 @@ next: | |||
752 | list_move(&page->lru, failed); | 986 | list_move(&page->lru, failed); |
753 | nr_failed++; | 987 | nr_failed++; |
754 | } else { | 988 | } else { |
755 | /* Success */ | 989 | if (newpage) { |
990 | /* Successful migration. Return page to LRU */ | ||
991 | move_to_lru(newpage); | ||
992 | } | ||
756 | list_move(&page->lru, moved); | 993 | list_move(&page->lru, moved); |
757 | } | 994 | } |
758 | } | 995 | } |
@@ -1170,7 +1407,7 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask) | |||
1170 | int i; | 1407 | int i; |
1171 | 1408 | ||
1172 | sc.gfp_mask = gfp_mask; | 1409 | sc.gfp_mask = gfp_mask; |
1173 | sc.may_writepage = 0; | 1410 | sc.may_writepage = !laptop_mode; |
1174 | sc.may_swap = 1; | 1411 | sc.may_swap = 1; |
1175 | 1412 | ||
1176 | inc_page_state(allocstall); | 1413 | inc_page_state(allocstall); |
@@ -1273,7 +1510,7 @@ loop_again: | |||
1273 | total_scanned = 0; | 1510 | total_scanned = 0; |
1274 | total_reclaimed = 0; | 1511 | total_reclaimed = 0; |
1275 | sc.gfp_mask = GFP_KERNEL; | 1512 | sc.gfp_mask = GFP_KERNEL; |
1276 | sc.may_writepage = 0; | 1513 | sc.may_writepage = !laptop_mode; |
1277 | sc.may_swap = 1; | 1514 | sc.may_swap = 1; |
1278 | sc.nr_mapped = read_page_state(nr_mapped); | 1515 | sc.nr_mapped = read_page_state(nr_mapped); |
1279 | 1516 | ||
@@ -1586,40 +1823,61 @@ module_init(kswapd_init) | |||
1586 | */ | 1823 | */ |
1587 | int zone_reclaim_mode __read_mostly; | 1824 | int zone_reclaim_mode __read_mostly; |
1588 | 1825 | ||
1826 | #define RECLAIM_OFF 0 | ||
1827 | #define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */ | ||
1828 | #define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ | ||
1829 | #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ | ||
1830 | #define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */ | ||
1831 | |||
1589 | /* | 1832 | /* |
1590 | * Mininum time between zone reclaim scans | 1833 | * Mininum time between zone reclaim scans |
1591 | */ | 1834 | */ |
1592 | #define ZONE_RECLAIM_INTERVAL HZ/2 | 1835 | int zone_reclaim_interval __read_mostly = 30*HZ; |
1836 | |||
1837 | /* | ||
1838 | * Priority for ZONE_RECLAIM. This determines the fraction of pages | ||
1839 | * of a node considered for each zone_reclaim. 4 scans 1/16th of | ||
1840 | * a zone. | ||
1841 | */ | ||
1842 | #define ZONE_RECLAIM_PRIORITY 4 | ||
1843 | |||
1593 | /* | 1844 | /* |
1594 | * Try to free up some pages from this zone through reclaim. | 1845 | * Try to free up some pages from this zone through reclaim. |
1595 | */ | 1846 | */ |
1596 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | 1847 | int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) |
1597 | { | 1848 | { |
1598 | int nr_pages = 1 << order; | 1849 | int nr_pages; |
1599 | struct task_struct *p = current; | 1850 | struct task_struct *p = current; |
1600 | struct reclaim_state reclaim_state; | 1851 | struct reclaim_state reclaim_state; |
1601 | struct scan_control sc = { | 1852 | struct scan_control sc; |
1602 | .gfp_mask = gfp_mask, | 1853 | cpumask_t mask; |
1603 | .may_writepage = 0, | 1854 | int node_id; |
1604 | .may_swap = 0, | 1855 | |
1605 | .nr_mapped = read_page_state(nr_mapped), | 1856 | if (time_before(jiffies, |
1606 | .nr_scanned = 0, | 1857 | zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval)) |
1607 | .nr_reclaimed = 0, | 1858 | return 0; |
1608 | .priority = 0 | ||
1609 | }; | ||
1610 | 1859 | ||
1611 | if (!(gfp_mask & __GFP_WAIT) || | 1860 | if (!(gfp_mask & __GFP_WAIT) || |
1612 | zone->zone_pgdat->node_id != numa_node_id() || | ||
1613 | zone->all_unreclaimable || | 1861 | zone->all_unreclaimable || |
1614 | atomic_read(&zone->reclaim_in_progress) > 0) | 1862 | atomic_read(&zone->reclaim_in_progress) > 0) |
1615 | return 0; | 1863 | return 0; |
1616 | 1864 | ||
1617 | if (time_before(jiffies, | 1865 | node_id = zone->zone_pgdat->node_id; |
1618 | zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL)) | 1866 | mask = node_to_cpumask(node_id); |
1619 | return 0; | 1867 | if (!cpus_empty(mask) && node_id != numa_node_id()) |
1868 | return 0; | ||
1869 | |||
1870 | sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE); | ||
1871 | sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP); | ||
1872 | sc.nr_scanned = 0; | ||
1873 | sc.nr_reclaimed = 0; | ||
1874 | sc.priority = ZONE_RECLAIM_PRIORITY + 1; | ||
1875 | sc.nr_mapped = read_page_state(nr_mapped); | ||
1876 | sc.gfp_mask = gfp_mask; | ||
1620 | 1877 | ||
1621 | disable_swap_token(); | 1878 | disable_swap_token(); |
1622 | 1879 | ||
1880 | nr_pages = 1 << order; | ||
1623 | if (nr_pages > SWAP_CLUSTER_MAX) | 1881 | if (nr_pages > SWAP_CLUSTER_MAX) |
1624 | sc.swap_cluster_max = nr_pages; | 1882 | sc.swap_cluster_max = nr_pages; |
1625 | else | 1883 | else |
@@ -1629,14 +1887,37 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
1629 | p->flags |= PF_MEMALLOC; | 1887 | p->flags |= PF_MEMALLOC; |
1630 | reclaim_state.reclaimed_slab = 0; | 1888 | reclaim_state.reclaimed_slab = 0; |
1631 | p->reclaim_state = &reclaim_state; | 1889 | p->reclaim_state = &reclaim_state; |
1632 | shrink_zone(zone, &sc); | 1890 | |
1891 | /* | ||
1892 | * Free memory by calling shrink zone with increasing priorities | ||
1893 | * until we have enough memory freed. | ||
1894 | */ | ||
1895 | do { | ||
1896 | sc.priority--; | ||
1897 | shrink_zone(zone, &sc); | ||
1898 | |||
1899 | } while (sc.nr_reclaimed < nr_pages && sc.priority > 0); | ||
1900 | |||
1901 | if (sc.nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) { | ||
1902 | /* | ||
1903 | * shrink_slab does not currently allow us to determine | ||
1904 | * how many pages were freed in the zone. So we just | ||
1905 | * shake the slab and then go offnode for a single allocation. | ||
1906 | * | ||
1907 | * shrink_slab will free memory on all zones and may take | ||
1908 | * a long time. | ||
1909 | */ | ||
1910 | shrink_slab(sc.nr_scanned, gfp_mask, order); | ||
1911 | sc.nr_reclaimed = 1; /* Avoid getting the off node timeout */ | ||
1912 | } | ||
1913 | |||
1633 | p->reclaim_state = NULL; | 1914 | p->reclaim_state = NULL; |
1634 | current->flags &= ~PF_MEMALLOC; | 1915 | current->flags &= ~PF_MEMALLOC; |
1635 | 1916 | ||
1636 | if (sc.nr_reclaimed == 0) | 1917 | if (sc.nr_reclaimed == 0) |
1637 | zone->last_unsuccessful_zone_reclaim = jiffies; | 1918 | zone->last_unsuccessful_zone_reclaim = jiffies; |
1638 | 1919 | ||
1639 | return sc.nr_reclaimed > nr_pages; | 1920 | return sc.nr_reclaimed >= nr_pages; |
1640 | } | 1921 | } |
1641 | #endif | 1922 | #endif |
1642 | 1923 | ||