aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c343
1 files changed, 312 insertions, 31 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2e34b61a70c7..5a610804cd06 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -477,7 +477,13 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
477 * processes. Try to unmap it here. 477 * processes. Try to unmap it here.
478 */ 478 */
479 if (page_mapped(page) && mapping) { 479 if (page_mapped(page) && mapping) {
480 switch (try_to_unmap(page)) { 480 /*
481 * No unmapping if we do not swap
482 */
483 if (!sc->may_swap)
484 goto keep_locked;
485
486 switch (try_to_unmap(page, 0)) {
481 case SWAP_FAIL: 487 case SWAP_FAIL:
482 goto activate_locked; 488 goto activate_locked;
483 case SWAP_AGAIN: 489 case SWAP_AGAIN:
@@ -492,7 +498,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
492 goto keep_locked; 498 goto keep_locked;
493 if (!may_enter_fs) 499 if (!may_enter_fs)
494 goto keep_locked; 500 goto keep_locked;
495 if (laptop_mode && !sc->may_writepage) 501 if (!sc->may_writepage)
496 goto keep_locked; 502 goto keep_locked;
497 503
498 /* Page is dirty, try to write it out here */ 504 /* Page is dirty, try to write it out here */
@@ -609,6 +615,15 @@ int putback_lru_pages(struct list_head *l)
609} 615}
610 616
611/* 617/*
618 * Non migratable page
619 */
620int fail_migrate_page(struct page *newpage, struct page *page)
621{
622 return -EIO;
623}
624EXPORT_SYMBOL(fail_migrate_page);
625
626/*
612 * swapout a single page 627 * swapout a single page
613 * page is locked upon entry, unlocked on exit 628 * page is locked upon entry, unlocked on exit
614 */ 629 */
@@ -617,7 +632,7 @@ static int swap_page(struct page *page)
617 struct address_space *mapping = page_mapping(page); 632 struct address_space *mapping = page_mapping(page);
618 633
619 if (page_mapped(page) && mapping) 634 if (page_mapped(page) && mapping)
620 if (try_to_unmap(page) != SWAP_SUCCESS) 635 if (try_to_unmap(page, 0) != SWAP_SUCCESS)
621 goto unlock_retry; 636 goto unlock_retry;
622 637
623 if (PageDirty(page)) { 638 if (PageDirty(page)) {
@@ -653,6 +668,167 @@ unlock_retry:
653retry: 668retry:
654 return -EAGAIN; 669 return -EAGAIN;
655} 670}
671EXPORT_SYMBOL(swap_page);
672
673/*
674 * Page migration was first developed in the context of the memory hotplug
675 * project. The main authors of the migration code are:
676 *
677 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
678 * Hirokazu Takahashi <taka@valinux.co.jp>
679 * Dave Hansen <haveblue@us.ibm.com>
680 * Christoph Lameter <clameter@sgi.com>
681 */
682
683/*
684 * Remove references for a page and establish the new page with the correct
685 * basic settings to be able to stop accesses to the page.
686 */
687int migrate_page_remove_references(struct page *newpage,
688 struct page *page, int nr_refs)
689{
690 struct address_space *mapping = page_mapping(page);
691 struct page **radix_pointer;
692
693 /*
694 * Avoid doing any of the following work if the page count
695 * indicates that the page is in use or truncate has removed
696 * the page.
697 */
698 if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
699 return 1;
700
701 /*
702 * Establish swap ptes for anonymous pages or destroy pte
703 * maps for files.
704 *
705 * In order to reestablish file backed mappings the fault handlers
706 * will take the radix tree_lock which may then be used to stop
707 * processses from accessing this page until the new page is ready.
708 *
709 * A process accessing via a swap pte (an anonymous page) will take a
710 * page_lock on the old page which will block the process until the
711 * migration attempt is complete. At that time the PageSwapCache bit
712 * will be examined. If the page was migrated then the PageSwapCache
713 * bit will be clear and the operation to retrieve the page will be
714 * retried which will find the new page in the radix tree. Then a new
715 * direct mapping may be generated based on the radix tree contents.
716 *
717 * If the page was not migrated then the PageSwapCache bit
718 * is still set and the operation may continue.
719 */
720 try_to_unmap(page, 1);
721
722 /*
723 * Give up if we were unable to remove all mappings.
724 */
725 if (page_mapcount(page))
726 return 1;
727
728 write_lock_irq(&mapping->tree_lock);
729
730 radix_pointer = (struct page **)radix_tree_lookup_slot(
731 &mapping->page_tree,
732 page_index(page));
733
734 if (!page_mapping(page) || page_count(page) != nr_refs ||
735 *radix_pointer != page) {
736 write_unlock_irq(&mapping->tree_lock);
737 return 1;
738 }
739
740 /*
741 * Now we know that no one else is looking at the page.
742 *
743 * Certain minimal information about a page must be available
744 * in order for other subsystems to properly handle the page if they
745 * find it through the radix tree update before we are finished
746 * copying the page.
747 */
748 get_page(newpage);
749 newpage->index = page->index;
750 newpage->mapping = page->mapping;
751 if (PageSwapCache(page)) {
752 SetPageSwapCache(newpage);
753 set_page_private(newpage, page_private(page));
754 }
755
756 *radix_pointer = newpage;
757 __put_page(page);
758 write_unlock_irq(&mapping->tree_lock);
759
760 return 0;
761}
762EXPORT_SYMBOL(migrate_page_remove_references);
763
764/*
765 * Copy the page to its new location
766 */
767void migrate_page_copy(struct page *newpage, struct page *page)
768{
769 copy_highpage(newpage, page);
770
771 if (PageError(page))
772 SetPageError(newpage);
773 if (PageReferenced(page))
774 SetPageReferenced(newpage);
775 if (PageUptodate(page))
776 SetPageUptodate(newpage);
777 if (PageActive(page))
778 SetPageActive(newpage);
779 if (PageChecked(page))
780 SetPageChecked(newpage);
781 if (PageMappedToDisk(page))
782 SetPageMappedToDisk(newpage);
783
784 if (PageDirty(page)) {
785 clear_page_dirty_for_io(page);
786 set_page_dirty(newpage);
787 }
788
789 ClearPageSwapCache(page);
790 ClearPageActive(page);
791 ClearPagePrivate(page);
792 set_page_private(page, 0);
793 page->mapping = NULL;
794
795 /*
796 * If any waiters have accumulated on the new page then
797 * wake them up.
798 */
799 if (PageWriteback(newpage))
800 end_page_writeback(newpage);
801}
802EXPORT_SYMBOL(migrate_page_copy);
803
804/*
805 * Common logic to directly migrate a single page suitable for
806 * pages that do not use PagePrivate.
807 *
808 * Pages are locked upon entry and exit.
809 */
810int migrate_page(struct page *newpage, struct page *page)
811{
812 BUG_ON(PageWriteback(page)); /* Writeback must be complete */
813
814 if (migrate_page_remove_references(newpage, page, 2))
815 return -EAGAIN;
816
817 migrate_page_copy(newpage, page);
818
819 /*
820 * Remove auxiliary swap entries and replace
821 * them with real ptes.
822 *
823 * Note that a real pte entry will allow processes that are not
824 * waiting on the page lock to use the new page via the page tables
825 * before the new page is unlocked.
826 */
827 remove_from_swap(newpage);
828 return 0;
829}
830EXPORT_SYMBOL(migrate_page);
831
656/* 832/*
657 * migrate_pages 833 * migrate_pages
658 * 834 *
@@ -666,11 +842,6 @@ retry:
666 * are movable anymore because t has become empty 842 * are movable anymore because t has become empty
667 * or no retryable pages exist anymore. 843 * or no retryable pages exist anymore.
668 * 844 *
669 * SIMPLIFIED VERSION: This implementation of migrate_pages
670 * is only swapping out pages and never touches the second
671 * list. The direct migration patchset
672 * extends this function to avoid the use of swap.
673 *
674 * Return: Number of pages not migrated when "to" ran empty. 845 * Return: Number of pages not migrated when "to" ran empty.
675 */ 846 */
676int migrate_pages(struct list_head *from, struct list_head *to, 847int migrate_pages(struct list_head *from, struct list_head *to,
@@ -691,6 +862,9 @@ redo:
691 retry = 0; 862 retry = 0;
692 863
693 list_for_each_entry_safe(page, page2, from, lru) { 864 list_for_each_entry_safe(page, page2, from, lru) {
865 struct page *newpage = NULL;
866 struct address_space *mapping;
867
694 cond_resched(); 868 cond_resched();
695 869
696 rc = 0; 870 rc = 0;
@@ -698,6 +872,9 @@ redo:
698 /* page was freed from under us. So we are done. */ 872 /* page was freed from under us. So we are done. */
699 goto next; 873 goto next;
700 874
875 if (to && list_empty(to))
876 break;
877
701 /* 878 /*
702 * Skip locked pages during the first two passes to give the 879 * Skip locked pages during the first two passes to give the
703 * functions holding the lock time to release the page. Later we 880 * functions holding the lock time to release the page. Later we
@@ -734,12 +911,69 @@ redo:
734 } 911 }
735 } 912 }
736 913
914 if (!to) {
915 rc = swap_page(page);
916 goto next;
917 }
918
919 newpage = lru_to_page(to);
920 lock_page(newpage);
921
737 /* 922 /*
738 * Page is properly locked and writeback is complete. 923 * Pages are properly locked and writeback is complete.
739 * Try to migrate the page. 924 * Try to migrate the page.
740 */ 925 */
741 rc = swap_page(page); 926 mapping = page_mapping(page);
742 goto next; 927 if (!mapping)
928 goto unlock_both;
929
930 if (mapping->a_ops->migratepage) {
931 rc = mapping->a_ops->migratepage(newpage, page);
932 goto unlock_both;
933 }
934
935 /*
936 * Trigger writeout if page is dirty
937 */
938 if (PageDirty(page)) {
939 switch (pageout(page, mapping)) {
940 case PAGE_KEEP:
941 case PAGE_ACTIVATE:
942 goto unlock_both;
943
944 case PAGE_SUCCESS:
945 unlock_page(newpage);
946 goto next;
947
948 case PAGE_CLEAN:
949 ; /* try to migrate the page below */
950 }
951 }
952 /*
953 * If we have no buffer or can release the buffer
954 * then do a simple migration.
955 */
956 if (!page_has_buffers(page) ||
957 try_to_release_page(page, GFP_KERNEL)) {
958 rc = migrate_page(newpage, page);
959 goto unlock_both;
960 }
961
962 /*
963 * On early passes with mapped pages simply
964 * retry. There may be a lock held for some
965 * buffers that may go away. Later
966 * swap them out.
967 */
968 if (pass > 4) {
969 unlock_page(newpage);
970 newpage = NULL;
971 rc = swap_page(page);
972 goto next;
973 }
974
975unlock_both:
976 unlock_page(newpage);
743 977
744unlock_page: 978unlock_page:
745 unlock_page(page); 979 unlock_page(page);
@@ -752,7 +986,10 @@ next:
752 list_move(&page->lru, failed); 986 list_move(&page->lru, failed);
753 nr_failed++; 987 nr_failed++;
754 } else { 988 } else {
755 /* Success */ 989 if (newpage) {
990 /* Successful migration. Return page to LRU */
991 move_to_lru(newpage);
992 }
756 list_move(&page->lru, moved); 993 list_move(&page->lru, moved);
757 } 994 }
758 } 995 }
@@ -1170,7 +1407,7 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
1170 int i; 1407 int i;
1171 1408
1172 sc.gfp_mask = gfp_mask; 1409 sc.gfp_mask = gfp_mask;
1173 sc.may_writepage = 0; 1410 sc.may_writepage = !laptop_mode;
1174 sc.may_swap = 1; 1411 sc.may_swap = 1;
1175 1412
1176 inc_page_state(allocstall); 1413 inc_page_state(allocstall);
@@ -1273,7 +1510,7 @@ loop_again:
1273 total_scanned = 0; 1510 total_scanned = 0;
1274 total_reclaimed = 0; 1511 total_reclaimed = 0;
1275 sc.gfp_mask = GFP_KERNEL; 1512 sc.gfp_mask = GFP_KERNEL;
1276 sc.may_writepage = 0; 1513 sc.may_writepage = !laptop_mode;
1277 sc.may_swap = 1; 1514 sc.may_swap = 1;
1278 sc.nr_mapped = read_page_state(nr_mapped); 1515 sc.nr_mapped = read_page_state(nr_mapped);
1279 1516
@@ -1586,40 +1823,61 @@ module_init(kswapd_init)
1586 */ 1823 */
1587int zone_reclaim_mode __read_mostly; 1824int zone_reclaim_mode __read_mostly;
1588 1825
1826#define RECLAIM_OFF 0
1827#define RECLAIM_ZONE (1<<0) /* Run shrink_cache on the zone */
1828#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
1829#define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */
1830#define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */
1831
1589/* 1832/*
1590 * Mininum time between zone reclaim scans 1833 * Mininum time between zone reclaim scans
1591 */ 1834 */
1592#define ZONE_RECLAIM_INTERVAL HZ/2 1835int zone_reclaim_interval __read_mostly = 30*HZ;
1836
1837/*
1838 * Priority for ZONE_RECLAIM. This determines the fraction of pages
1839 * of a node considered for each zone_reclaim. 4 scans 1/16th of
1840 * a zone.
1841 */
1842#define ZONE_RECLAIM_PRIORITY 4
1843
1593/* 1844/*
1594 * Try to free up some pages from this zone through reclaim. 1845 * Try to free up some pages from this zone through reclaim.
1595 */ 1846 */
1596int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) 1847int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1597{ 1848{
1598 int nr_pages = 1 << order; 1849 int nr_pages;
1599 struct task_struct *p = current; 1850 struct task_struct *p = current;
1600 struct reclaim_state reclaim_state; 1851 struct reclaim_state reclaim_state;
1601 struct scan_control sc = { 1852 struct scan_control sc;
1602 .gfp_mask = gfp_mask, 1853 cpumask_t mask;
1603 .may_writepage = 0, 1854 int node_id;
1604 .may_swap = 0, 1855
1605 .nr_mapped = read_page_state(nr_mapped), 1856 if (time_before(jiffies,
1606 .nr_scanned = 0, 1857 zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval))
1607 .nr_reclaimed = 0, 1858 return 0;
1608 .priority = 0
1609 };
1610 1859
1611 if (!(gfp_mask & __GFP_WAIT) || 1860 if (!(gfp_mask & __GFP_WAIT) ||
1612 zone->zone_pgdat->node_id != numa_node_id() ||
1613 zone->all_unreclaimable || 1861 zone->all_unreclaimable ||
1614 atomic_read(&zone->reclaim_in_progress) > 0) 1862 atomic_read(&zone->reclaim_in_progress) > 0)
1615 return 0; 1863 return 0;
1616 1864
1617 if (time_before(jiffies, 1865 node_id = zone->zone_pgdat->node_id;
1618 zone->last_unsuccessful_zone_reclaim + ZONE_RECLAIM_INTERVAL)) 1866 mask = node_to_cpumask(node_id);
1619 return 0; 1867 if (!cpus_empty(mask) && node_id != numa_node_id())
1868 return 0;
1869
1870 sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE);
1871 sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP);
1872 sc.nr_scanned = 0;
1873 sc.nr_reclaimed = 0;
1874 sc.priority = ZONE_RECLAIM_PRIORITY + 1;
1875 sc.nr_mapped = read_page_state(nr_mapped);
1876 sc.gfp_mask = gfp_mask;
1620 1877
1621 disable_swap_token(); 1878 disable_swap_token();
1622 1879
1880 nr_pages = 1 << order;
1623 if (nr_pages > SWAP_CLUSTER_MAX) 1881 if (nr_pages > SWAP_CLUSTER_MAX)
1624 sc.swap_cluster_max = nr_pages; 1882 sc.swap_cluster_max = nr_pages;
1625 else 1883 else
@@ -1629,14 +1887,37 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
1629 p->flags |= PF_MEMALLOC; 1887 p->flags |= PF_MEMALLOC;
1630 reclaim_state.reclaimed_slab = 0; 1888 reclaim_state.reclaimed_slab = 0;
1631 p->reclaim_state = &reclaim_state; 1889 p->reclaim_state = &reclaim_state;
1632 shrink_zone(zone, &sc); 1890
1891 /*
1892 * Free memory by calling shrink zone with increasing priorities
1893 * until we have enough memory freed.
1894 */
1895 do {
1896 sc.priority--;
1897 shrink_zone(zone, &sc);
1898
1899 } while (sc.nr_reclaimed < nr_pages && sc.priority > 0);
1900
1901 if (sc.nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
1902 /*
1903 * shrink_slab does not currently allow us to determine
1904 * how many pages were freed in the zone. So we just
1905 * shake the slab and then go offnode for a single allocation.
1906 *
1907 * shrink_slab will free memory on all zones and may take
1908 * a long time.
1909 */
1910 shrink_slab(sc.nr_scanned, gfp_mask, order);
1911 sc.nr_reclaimed = 1; /* Avoid getting the off node timeout */
1912 }
1913
1633 p->reclaim_state = NULL; 1914 p->reclaim_state = NULL;
1634 current->flags &= ~PF_MEMALLOC; 1915 current->flags &= ~PF_MEMALLOC;
1635 1916
1636 if (sc.nr_reclaimed == 0) 1917 if (sc.nr_reclaimed == 0)
1637 zone->last_unsuccessful_zone_reclaim = jiffies; 1918 zone->last_unsuccessful_zone_reclaim = jiffies;
1638 1919
1639 return sc.nr_reclaimed > nr_pages; 1920 return sc.nr_reclaimed >= nr_pages;
1640} 1921}
1641#endif 1922#endif
1642 1923