aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmscan.c
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-03-22 03:09:12 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-22 10:54:06 -0500
commitb20a35035f983f4ac7e29c4a68f30e43510007e0 (patch)
treefdf090ddddbcc275349f62f71adc98649e2c683b /mm/vmscan.c
parent442295c94bf650221af3ef20fc68fa3e93876818 (diff)
[PATCH] page migration reorg
Centralize the page migration functions in anticipation of additional tinkering. Creates a new file mm/migrate.c 1. Extract buffer_migrate_page() from fs/buffer.c 2. Extract central migration code from vmscan.c 3. Extract some components from mempolicy.c 4. Export pageout() and remove_from_swap() from vmscan.c 5. Make it possible to configure NUMA systems without page migration and non-NUMA systems with page migration. I had to so some #ifdeffing in mempolicy.c that may need a cleanup. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r--mm/vmscan.c491
1 files changed, 2 insertions, 489 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 548e023c193b..fd572bbdc9f5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -42,18 +42,6 @@
42 42
43#include "internal.h" 43#include "internal.h"
44 44
45/* possible outcome of pageout() */
46typedef enum {
47 /* failed to write page out, page is locked */
48 PAGE_KEEP,
49 /* move page to the active list, page is locked */
50 PAGE_ACTIVATE,
51 /* page has been sent to the disk successfully, page is unlocked */
52 PAGE_SUCCESS,
53 /* page is clean and locked */
54 PAGE_CLEAN,
55} pageout_t;
56
57struct scan_control { 45struct scan_control {
58 /* Incremented by the number of inactive pages that were scanned */ 46 /* Incremented by the number of inactive pages that were scanned */
59 unsigned long nr_scanned; 47 unsigned long nr_scanned;
@@ -304,7 +292,7 @@ static void handle_write_error(struct address_space *mapping,
304 * pageout is called by shrink_page_list() for each dirty page. 292 * pageout is called by shrink_page_list() for each dirty page.
305 * Calls ->writepage(). 293 * Calls ->writepage().
306 */ 294 */
307static pageout_t pageout(struct page *page, struct address_space *mapping) 295pageout_t pageout(struct page *page, struct address_space *mapping)
308{ 296{
309 /* 297 /*
310 * If the page is dirty, only perform writeback if that write 298 * If the page is dirty, only perform writeback if that write
@@ -372,7 +360,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
372 return PAGE_CLEAN; 360 return PAGE_CLEAN;
373} 361}
374 362
375static int remove_mapping(struct address_space *mapping, struct page *page) 363int remove_mapping(struct address_space *mapping, struct page *page)
376{ 364{
377 if (!mapping) 365 if (!mapping)
378 return 0; /* truncate got there first */ 366 return 0; /* truncate got there first */
@@ -570,481 +558,6 @@ keep:
570 return nr_reclaimed; 558 return nr_reclaimed;
571} 559}
572 560
573#ifdef CONFIG_MIGRATION
574static inline void move_to_lru(struct page *page)
575{
576 list_del(&page->lru);
577 if (PageActive(page)) {
578 /*
579 * lru_cache_add_active checks that
580 * the PG_active bit is off.
581 */
582 ClearPageActive(page);
583 lru_cache_add_active(page);
584 } else {
585 lru_cache_add(page);
586 }
587 put_page(page);
588}
589
590/*
591 * Add isolated pages on the list back to the LRU.
592 *
593 * returns the number of pages put back.
594 */
595unsigned long putback_lru_pages(struct list_head *l)
596{
597 struct page *page;
598 struct page *page2;
599 unsigned long count = 0;
600
601 list_for_each_entry_safe(page, page2, l, lru) {
602 move_to_lru(page);
603 count++;
604 }
605 return count;
606}
607
608/*
609 * Non migratable page
610 */
611int fail_migrate_page(struct page *newpage, struct page *page)
612{
613 return -EIO;
614}
615EXPORT_SYMBOL(fail_migrate_page);
616
617/*
618 * swapout a single page
619 * page is locked upon entry, unlocked on exit
620 */
621static int swap_page(struct page *page)
622{
623 struct address_space *mapping = page_mapping(page);
624
625 if (page_mapped(page) && mapping)
626 if (try_to_unmap(page, 1) != SWAP_SUCCESS)
627 goto unlock_retry;
628
629 if (PageDirty(page)) {
630 /* Page is dirty, try to write it out here */
631 switch(pageout(page, mapping)) {
632 case PAGE_KEEP:
633 case PAGE_ACTIVATE:
634 goto unlock_retry;
635
636 case PAGE_SUCCESS:
637 goto retry;
638
639 case PAGE_CLEAN:
640 ; /* try to free the page below */
641 }
642 }
643
644 if (PagePrivate(page)) {
645 if (!try_to_release_page(page, GFP_KERNEL) ||
646 (!mapping && page_count(page) == 1))
647 goto unlock_retry;
648 }
649
650 if (remove_mapping(mapping, page)) {
651 /* Success */
652 unlock_page(page);
653 return 0;
654 }
655
656unlock_retry:
657 unlock_page(page);
658
659retry:
660 return -EAGAIN;
661}
662EXPORT_SYMBOL(swap_page);
663
664/*
665 * Page migration was first developed in the context of the memory hotplug
666 * project. The main authors of the migration code are:
667 *
668 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
669 * Hirokazu Takahashi <taka@valinux.co.jp>
670 * Dave Hansen <haveblue@us.ibm.com>
671 * Christoph Lameter <clameter@sgi.com>
672 */
673
674/*
675 * Remove references for a page and establish the new page with the correct
676 * basic settings to be able to stop accesses to the page.
677 */
678int migrate_page_remove_references(struct page *newpage,
679 struct page *page, int nr_refs)
680{
681 struct address_space *mapping = page_mapping(page);
682 struct page **radix_pointer;
683
684 /*
685 * Avoid doing any of the following work if the page count
686 * indicates that the page is in use or truncate has removed
687 * the page.
688 */
689 if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
690 return -EAGAIN;
691
692 /*
693 * Establish swap ptes for anonymous pages or destroy pte
694 * maps for files.
695 *
696 * In order to reestablish file backed mappings the fault handlers
697 * will take the radix tree_lock which may then be used to stop
698 * processses from accessing this page until the new page is ready.
699 *
700 * A process accessing via a swap pte (an anonymous page) will take a
701 * page_lock on the old page which will block the process until the
702 * migration attempt is complete. At that time the PageSwapCache bit
703 * will be examined. If the page was migrated then the PageSwapCache
704 * bit will be clear and the operation to retrieve the page will be
705 * retried which will find the new page in the radix tree. Then a new
706 * direct mapping may be generated based on the radix tree contents.
707 *
708 * If the page was not migrated then the PageSwapCache bit
709 * is still set and the operation may continue.
710 */
711 if (try_to_unmap(page, 1) == SWAP_FAIL)
712 /* A vma has VM_LOCKED set -> Permanent failure */
713 return -EPERM;
714
715 /*
716 * Give up if we were unable to remove all mappings.
717 */
718 if (page_mapcount(page))
719 return -EAGAIN;
720
721 write_lock_irq(&mapping->tree_lock);
722
723 radix_pointer = (struct page **)radix_tree_lookup_slot(
724 &mapping->page_tree,
725 page_index(page));
726
727 if (!page_mapping(page) || page_count(page) != nr_refs ||
728 *radix_pointer != page) {
729 write_unlock_irq(&mapping->tree_lock);
730 return -EAGAIN;
731 }
732
733 /*
734 * Now we know that no one else is looking at the page.
735 *
736 * Certain minimal information about a page must be available
737 * in order for other subsystems to properly handle the page if they
738 * find it through the radix tree update before we are finished
739 * copying the page.
740 */
741 get_page(newpage);
742 newpage->index = page->index;
743 newpage->mapping = page->mapping;
744 if (PageSwapCache(page)) {
745 SetPageSwapCache(newpage);
746 set_page_private(newpage, page_private(page));
747 }
748
749 *radix_pointer = newpage;
750 __put_page(page);
751 write_unlock_irq(&mapping->tree_lock);
752
753 return 0;
754}
755EXPORT_SYMBOL(migrate_page_remove_references);
756
757/*
758 * Copy the page to its new location
759 */
760void migrate_page_copy(struct page *newpage, struct page *page)
761{
762 copy_highpage(newpage, page);
763
764 if (PageError(page))
765 SetPageError(newpage);
766 if (PageReferenced(page))
767 SetPageReferenced(newpage);
768 if (PageUptodate(page))
769 SetPageUptodate(newpage);
770 if (PageActive(page))
771 SetPageActive(newpage);
772 if (PageChecked(page))
773 SetPageChecked(newpage);
774 if (PageMappedToDisk(page))
775 SetPageMappedToDisk(newpage);
776
777 if (PageDirty(page)) {
778 clear_page_dirty_for_io(page);
779 set_page_dirty(newpage);
780 }
781
782 ClearPageSwapCache(page);
783 ClearPageActive(page);
784 ClearPagePrivate(page);
785 set_page_private(page, 0);
786 page->mapping = NULL;
787
788 /*
789 * If any waiters have accumulated on the new page then
790 * wake them up.
791 */
792 if (PageWriteback(newpage))
793 end_page_writeback(newpage);
794}
795EXPORT_SYMBOL(migrate_page_copy);
796
797/*
798 * Common logic to directly migrate a single page suitable for
799 * pages that do not use PagePrivate.
800 *
801 * Pages are locked upon entry and exit.
802 */
803int migrate_page(struct page *newpage, struct page *page)
804{
805 int rc;
806
807 BUG_ON(PageWriteback(page)); /* Writeback must be complete */
808
809 rc = migrate_page_remove_references(newpage, page, 2);
810
811 if (rc)
812 return rc;
813
814 migrate_page_copy(newpage, page);
815
816 /*
817 * Remove auxiliary swap entries and replace
818 * them with real ptes.
819 *
820 * Note that a real pte entry will allow processes that are not
821 * waiting on the page lock to use the new page via the page tables
822 * before the new page is unlocked.
823 */
824 remove_from_swap(newpage);
825 return 0;
826}
827EXPORT_SYMBOL(migrate_page);
828
829/*
830 * migrate_pages
831 *
832 * Two lists are passed to this function. The first list
833 * contains the pages isolated from the LRU to be migrated.
834 * The second list contains new pages that the pages isolated
835 * can be moved to. If the second list is NULL then all
836 * pages are swapped out.
837 *
838 * The function returns after 10 attempts or if no pages
839 * are movable anymore because to has become empty
840 * or no retryable pages exist anymore.
841 *
842 * Return: Number of pages not migrated when "to" ran empty.
843 */
844unsigned long migrate_pages(struct list_head *from, struct list_head *to,
845 struct list_head *moved, struct list_head *failed)
846{
847 unsigned long retry;
848 unsigned long nr_failed = 0;
849 int pass = 0;
850 struct page *page;
851 struct page *page2;
852 int swapwrite = current->flags & PF_SWAPWRITE;
853 int rc;
854
855 if (!swapwrite)
856 current->flags |= PF_SWAPWRITE;
857
858redo:
859 retry = 0;
860
861 list_for_each_entry_safe(page, page2, from, lru) {
862 struct page *newpage = NULL;
863 struct address_space *mapping;
864
865 cond_resched();
866
867 rc = 0;
868 if (page_count(page) == 1)
869 /* page was freed from under us. So we are done. */
870 goto next;
871
872 if (to && list_empty(to))
873 break;
874
875 /*
876 * Skip locked pages during the first two passes to give the
877 * functions holding the lock time to release the page. Later we
878 * use lock_page() to have a higher chance of acquiring the
879 * lock.
880 */
881 rc = -EAGAIN;
882 if (pass > 2)
883 lock_page(page);
884 else
885 if (TestSetPageLocked(page))
886 goto next;
887
888 /*
889 * Only wait on writeback if we have already done a pass where
890 * we we may have triggered writeouts for lots of pages.
891 */
892 if (pass > 0) {
893 wait_on_page_writeback(page);
894 } else {
895 if (PageWriteback(page))
896 goto unlock_page;
897 }
898
899 /*
900 * Anonymous pages must have swap cache references otherwise
901 * the information contained in the page maps cannot be
902 * preserved.
903 */
904 if (PageAnon(page) && !PageSwapCache(page)) {
905 if (!add_to_swap(page, GFP_KERNEL)) {
906 rc = -ENOMEM;
907 goto unlock_page;
908 }
909 }
910
911 if (!to) {
912 rc = swap_page(page);
913 goto next;
914 }
915
916 newpage = lru_to_page(to);
917 lock_page(newpage);
918
919 /*
920 * Pages are properly locked and writeback is complete.
921 * Try to migrate the page.
922 */
923 mapping = page_mapping(page);
924 if (!mapping)
925 goto unlock_both;
926
927 if (mapping->a_ops->migratepage) {
928 /*
929 * Most pages have a mapping and most filesystems
930 * should provide a migration function. Anonymous
931 * pages are part of swap space which also has its
932 * own migration function. This is the most common
933 * path for page migration.
934 */
935 rc = mapping->a_ops->migratepage(newpage, page);
936 goto unlock_both;
937 }
938
939 /*
940 * Default handling if a filesystem does not provide
941 * a migration function. We can only migrate clean
942 * pages so try to write out any dirty pages first.
943 */
944 if (PageDirty(page)) {
945 switch (pageout(page, mapping)) {
946 case PAGE_KEEP:
947 case PAGE_ACTIVATE:
948 goto unlock_both;
949
950 case PAGE_SUCCESS:
951 unlock_page(newpage);
952 goto next;
953
954 case PAGE_CLEAN:
955 ; /* try to migrate the page below */
956 }
957 }
958
959 /*
960 * Buffers are managed in a filesystem specific way.
961 * We must have no buffers or drop them.
962 */
963 if (!page_has_buffers(page) ||
964 try_to_release_page(page, GFP_KERNEL)) {
965 rc = migrate_page(newpage, page);
966 goto unlock_both;
967 }
968
969 /*
970 * On early passes with mapped pages simply
971 * retry. There may be a lock held for some
972 * buffers that may go away. Later
973 * swap them out.
974 */
975 if (pass > 4) {
976 /*
977 * Persistently unable to drop buffers..... As a
978 * measure of last resort we fall back to
979 * swap_page().
980 */
981 unlock_page(newpage);
982 newpage = NULL;
983 rc = swap_page(page);
984 goto next;
985 }
986
987unlock_both:
988 unlock_page(newpage);
989
990unlock_page:
991 unlock_page(page);
992
993next:
994 if (rc == -EAGAIN) {
995 retry++;
996 } else if (rc) {
997 /* Permanent failure */
998 list_move(&page->lru, failed);
999 nr_failed++;
1000 } else {
1001 if (newpage) {
1002 /* Successful migration. Return page to LRU */
1003 move_to_lru(newpage);
1004 }
1005 list_move(&page->lru, moved);
1006 }
1007 }
1008 if (retry && pass++ < 10)
1009 goto redo;
1010
1011 if (!swapwrite)
1012 current->flags &= ~PF_SWAPWRITE;
1013
1014 return nr_failed + retry;
1015}
1016
1017/*
1018 * Isolate one page from the LRU lists and put it on the
1019 * indicated list with elevated refcount.
1020 *
1021 * Result:
1022 * 0 = page not on LRU list
1023 * 1 = page removed from LRU list and added to the specified list.
1024 */
1025int isolate_lru_page(struct page *page)
1026{
1027 int ret = 0;
1028
1029 if (PageLRU(page)) {
1030 struct zone *zone = page_zone(page);
1031 spin_lock_irq(&zone->lru_lock);
1032 if (PageLRU(page)) {
1033 ret = 1;
1034 get_page(page);
1035 ClearPageLRU(page);
1036 if (PageActive(page))
1037 del_page_from_active_list(zone, page);
1038 else
1039 del_page_from_inactive_list(zone, page);
1040 }
1041 spin_unlock_irq(&zone->lru_lock);
1042 }
1043
1044 return ret;
1045}
1046#endif
1047
1048/* 561/*
1049 * zone->lru_lock is heavily contended. Some of the functions that 562 * zone->lru_lock is heavily contended. Some of the functions that
1050 * shrink the lists perform better by taking out a batch of pages 563 * shrink the lists perform better by taking out a batch of pages