aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@sgi.com>2006-02-01 06:05:38 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-02-01 11:53:16 -0500
commita48d07afdf18212de22b959715b16793c5a6e57a (patch)
tree36d5963c29ceb5c2f6df53036cef5c0d30383dbf /mm
parentb16664e44c54525be89dc07ad15a13b4eeec5634 (diff)
[PATCH] Direct Migration V9: migrate_pages() extension
Add direct migration support with fall back to swap. Direct migration support on top of the swap based page migration facility. This allows the direct migration of anonymous pages and the migration of file backed pages by dropping the associated buffers (requires writeout). Fall back to swap out if necessary. The patch is based on lots of patches from the hotplug project but the code was restructured, documented and simplified as much as possible. Note that an additional patch that defines the migrate_page() method for filesystems is necessary in order to avoid writeback for anonymous and file backed pages. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Mike Kravetz <kravetz@us.ibm.com> Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/rmap.c21
-rw-r--r--mm/vmscan.c226
2 files changed, 227 insertions, 20 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index d85a99d28c0..13fad5fcdf7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -52,6 +52,7 @@
52#include <linux/init.h> 52#include <linux/init.h>
53#include <linux/rmap.h> 53#include <linux/rmap.h>
54#include <linux/rcupdate.h> 54#include <linux/rcupdate.h>
55#include <linux/module.h>
55 56
56#include <asm/tlbflush.h> 57#include <asm/tlbflush.h>
57 58
@@ -541,7 +542,8 @@ void page_remove_rmap(struct page *page)
541 * Subfunctions of try_to_unmap: try_to_unmap_one called 542 * Subfunctions of try_to_unmap: try_to_unmap_one called
542 * repeatedly from either try_to_unmap_anon or try_to_unmap_file. 543 * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
543 */ 544 */
544static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) 545static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
546 int ignore_refs)
545{ 547{
546 struct mm_struct *mm = vma->vm_mm; 548 struct mm_struct *mm = vma->vm_mm;
547 unsigned long address; 549 unsigned long address;
@@ -564,7 +566,8 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
564 * skipped over this mm) then we should reactivate it. 566 * skipped over this mm) then we should reactivate it.
565 */ 567 */
566 if ((vma->vm_flags & VM_LOCKED) || 568 if ((vma->vm_flags & VM_LOCKED) ||
567 ptep_clear_flush_young(vma, address, pte)) { 569 (ptep_clear_flush_young(vma, address, pte)
570 && !ignore_refs)) {
568 ret = SWAP_FAIL; 571 ret = SWAP_FAIL;
569 goto out_unmap; 572 goto out_unmap;
570 } 573 }
@@ -698,7 +701,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
698 pte_unmap_unlock(pte - 1, ptl); 701 pte_unmap_unlock(pte - 1, ptl);
699} 702}
700 703
701static int try_to_unmap_anon(struct page *page) 704static int try_to_unmap_anon(struct page *page, int ignore_refs)
702{ 705{
703 struct anon_vma *anon_vma; 706 struct anon_vma *anon_vma;
704 struct vm_area_struct *vma; 707 struct vm_area_struct *vma;
@@ -709,7 +712,7 @@ static int try_to_unmap_anon(struct page *page)
709 return ret; 712 return ret;
710 713
711 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 714 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
712 ret = try_to_unmap_one(page, vma); 715 ret = try_to_unmap_one(page, vma, ignore_refs);
713 if (ret == SWAP_FAIL || !page_mapped(page)) 716 if (ret == SWAP_FAIL || !page_mapped(page))
714 break; 717 break;
715 } 718 }
@@ -726,7 +729,7 @@ static int try_to_unmap_anon(struct page *page)
726 * 729 *
727 * This function is only called from try_to_unmap for object-based pages. 730 * This function is only called from try_to_unmap for object-based pages.
728 */ 731 */
729static int try_to_unmap_file(struct page *page) 732static int try_to_unmap_file(struct page *page, int ignore_refs)
730{ 733{
731 struct address_space *mapping = page->mapping; 734 struct address_space *mapping = page->mapping;
732 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); 735 pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -740,7 +743,7 @@ static int try_to_unmap_file(struct page *page)
740 743
741 spin_lock(&mapping->i_mmap_lock); 744 spin_lock(&mapping->i_mmap_lock);
742 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 745 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
743 ret = try_to_unmap_one(page, vma); 746 ret = try_to_unmap_one(page, vma, ignore_refs);
744 if (ret == SWAP_FAIL || !page_mapped(page)) 747 if (ret == SWAP_FAIL || !page_mapped(page))
745 goto out; 748 goto out;
746 } 749 }
@@ -825,16 +828,16 @@ out:
825 * SWAP_AGAIN - we missed a mapping, try again later 828 * SWAP_AGAIN - we missed a mapping, try again later
826 * SWAP_FAIL - the page is unswappable 829 * SWAP_FAIL - the page is unswappable
827 */ 830 */
828int try_to_unmap(struct page *page) 831int try_to_unmap(struct page *page, int ignore_refs)
829{ 832{
830 int ret; 833 int ret;
831 834
832 BUG_ON(!PageLocked(page)); 835 BUG_ON(!PageLocked(page));
833 836
834 if (PageAnon(page)) 837 if (PageAnon(page))
835 ret = try_to_unmap_anon(page); 838 ret = try_to_unmap_anon(page, ignore_refs);
836 else 839 else
837 ret = try_to_unmap_file(page); 840 ret = try_to_unmap_file(page, ignore_refs);
838 841
839 if (!page_mapped(page)) 842 if (!page_mapped(page))
840 ret = SWAP_SUCCESS; 843 ret = SWAP_SUCCESS;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index aa4b80dbe3a..8f326ce2b69 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -483,7 +483,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
483 if (!sc->may_swap) 483 if (!sc->may_swap)
484 goto keep_locked; 484 goto keep_locked;
485 485
486 switch (try_to_unmap(page)) { 486 switch (try_to_unmap(page, 0)) {
487 case SWAP_FAIL: 487 case SWAP_FAIL:
488 goto activate_locked; 488 goto activate_locked;
489 case SWAP_AGAIN: 489 case SWAP_AGAIN:
@@ -623,7 +623,7 @@ static int swap_page(struct page *page)
623 struct address_space *mapping = page_mapping(page); 623 struct address_space *mapping = page_mapping(page);
624 624
625 if (page_mapped(page) && mapping) 625 if (page_mapped(page) && mapping)
626 if (try_to_unmap(page) != SWAP_SUCCESS) 626 if (try_to_unmap(page, 0) != SWAP_SUCCESS)
627 goto unlock_retry; 627 goto unlock_retry;
628 628
629 if (PageDirty(page)) { 629 if (PageDirty(page)) {
@@ -659,6 +659,154 @@ unlock_retry:
659retry: 659retry:
660 return -EAGAIN; 660 return -EAGAIN;
661} 661}
662
663/*
664 * Page migration was first developed in the context of the memory hotplug
665 * project. The main authors of the migration code are:
666 *
667 * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
668 * Hirokazu Takahashi <taka@valinux.co.jp>
669 * Dave Hansen <haveblue@us.ibm.com>
670 * Christoph Lameter <clameter@sgi.com>
671 */
672
673/*
674 * Remove references for a page and establish the new page with the correct
675 * basic settings to be able to stop accesses to the page.
676 */
677static int migrate_page_remove_references(struct page *newpage,
678 struct page *page, int nr_refs)
679{
680 struct address_space *mapping = page_mapping(page);
681 struct page **radix_pointer;
682
683 /*
684 * Avoid doing any of the following work if the page count
685 * indicates that the page is in use or truncate has removed
686 * the page.
687 */
688 if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
689 return 1;
690
691 /*
692 * Establish swap ptes for anonymous pages or destroy pte
693 * maps for files.
694 *
695 * In order to reestablish file backed mappings the fault handlers
696 * will take the radix tree_lock which may then be used to stop
697 * processses from accessing this page until the new page is ready.
698 *
699 * A process accessing via a swap pte (an anonymous page) will take a
700 * page_lock on the old page which will block the process until the
701 * migration attempt is complete. At that time the PageSwapCache bit
702 * will be examined. If the page was migrated then the PageSwapCache
703 * bit will be clear and the operation to retrieve the page will be
704 * retried which will find the new page in the radix tree. Then a new
705 * direct mapping may be generated based on the radix tree contents.
706 *
707 * If the page was not migrated then the PageSwapCache bit
708 * is still set and the operation may continue.
709 */
710 try_to_unmap(page, 1);
711
712 /*
713 * Give up if we were unable to remove all mappings.
714 */
715 if (page_mapcount(page))
716 return 1;
717
718 write_lock_irq(&mapping->tree_lock);
719
720 radix_pointer = (struct page **)radix_tree_lookup_slot(
721 &mapping->page_tree,
722 page_index(page));
723
724 if (!page_mapping(page) || page_count(page) != nr_refs ||
725 *radix_pointer != page) {
726 write_unlock_irq(&mapping->tree_lock);
727 return 1;
728 }
729
730 /*
731 * Now we know that no one else is looking at the page.
732 *
733 * Certain minimal information about a page must be available
734 * in order for other subsystems to properly handle the page if they
735 * find it through the radix tree update before we are finished
736 * copying the page.
737 */
738 get_page(newpage);
739 newpage->index = page->index;
740 newpage->mapping = page->mapping;
741 if (PageSwapCache(page)) {
742 SetPageSwapCache(newpage);
743 set_page_private(newpage, page_private(page));
744 }
745
746 *radix_pointer = newpage;
747 __put_page(page);
748 write_unlock_irq(&mapping->tree_lock);
749
750 return 0;
751}
752
753/*
754 * Copy the page to its new location
755 */
756void migrate_page_copy(struct page *newpage, struct page *page)
757{
758 copy_highpage(newpage, page);
759
760 if (PageError(page))
761 SetPageError(newpage);
762 if (PageReferenced(page))
763 SetPageReferenced(newpage);
764 if (PageUptodate(page))
765 SetPageUptodate(newpage);
766 if (PageActive(page))
767 SetPageActive(newpage);
768 if (PageChecked(page))
769 SetPageChecked(newpage);
770 if (PageMappedToDisk(page))
771 SetPageMappedToDisk(newpage);
772
773 if (PageDirty(page)) {
774 clear_page_dirty_for_io(page);
775 set_page_dirty(newpage);
776 }
777
778 ClearPageSwapCache(page);
779 ClearPageActive(page);
780 ClearPagePrivate(page);
781 set_page_private(page, 0);
782 page->mapping = NULL;
783
784 /*
785 * If any waiters have accumulated on the new page then
786 * wake them up.
787 */
788 if (PageWriteback(newpage))
789 end_page_writeback(newpage);
790}
791
792/*
793 * Common logic to directly migrate a single page suitable for
794 * pages that do not use PagePrivate.
795 *
796 * Pages are locked upon entry and exit.
797 */
798int migrate_page(struct page *newpage, struct page *page)
799{
800 BUG_ON(PageWriteback(page)); /* Writeback must be complete */
801
802 if (migrate_page_remove_references(newpage, page, 2))
803 return -EAGAIN;
804
805 migrate_page_copy(newpage, page);
806
807 return 0;
808}
809
662/* 810/*
663 * migrate_pages 811 * migrate_pages
664 * 812 *
@@ -672,11 +820,6 @@ retry:
672 * are movable anymore because t has become empty 820 * are movable anymore because t has become empty
673 * or no retryable pages exist anymore. 821 * or no retryable pages exist anymore.
674 * 822 *
675 * SIMPLIFIED VERSION: This implementation of migrate_pages
676 * is only swapping out pages and never touches the second
677 * list. The direct migration patchset
678 * extends this function to avoid the use of swap.
679 *
680 * Return: Number of pages not migrated when "to" ran empty. 823 * Return: Number of pages not migrated when "to" ran empty.
681 */ 824 */
682int migrate_pages(struct list_head *from, struct list_head *to, 825int migrate_pages(struct list_head *from, struct list_head *to,
@@ -697,6 +840,9 @@ redo:
697 retry = 0; 840 retry = 0;
698 841
699 list_for_each_entry_safe(page, page2, from, lru) { 842 list_for_each_entry_safe(page, page2, from, lru) {
843 struct page *newpage = NULL;
844 struct address_space *mapping;
845
700 cond_resched(); 846 cond_resched();
701 847
702 rc = 0; 848 rc = 0;
@@ -704,6 +850,9 @@ redo:
704 /* page was freed from under us. So we are done. */ 850 /* page was freed from under us. So we are done. */
705 goto next; 851 goto next;
706 852
853 if (to && list_empty(to))
854 break;
855
707 /* 856 /*
708 * Skip locked pages during the first two passes to give the 857 * Skip locked pages during the first two passes to give the
709 * functions holding the lock time to release the page. Later we 858 * functions holding the lock time to release the page. Later we
@@ -740,12 +889,64 @@ redo:
740 } 889 }
741 } 890 }
742 891
892 if (!to) {
893 rc = swap_page(page);
894 goto next;
895 }
896
897 newpage = lru_to_page(to);
898 lock_page(newpage);
899
743 /* 900 /*
744 * Page is properly locked and writeback is complete. 901 * Pages are properly locked and writeback is complete.
745 * Try to migrate the page. 902 * Try to migrate the page.
746 */ 903 */
747 rc = swap_page(page); 904 mapping = page_mapping(page);
748 goto next; 905 if (!mapping)
906 goto unlock_both;
907
908 /*
909 * Trigger writeout if page is dirty
910 */
911 if (PageDirty(page)) {
912 switch (pageout(page, mapping)) {
913 case PAGE_KEEP:
914 case PAGE_ACTIVATE:
915 goto unlock_both;
916
917 case PAGE_SUCCESS:
918 unlock_page(newpage);
919 goto next;
920
921 case PAGE_CLEAN:
922 ; /* try to migrate the page below */
923 }
924 }
925 /*
926 * If we have no buffer or can release the buffer
927 * then do a simple migration.
928 */
929 if (!page_has_buffers(page) ||
930 try_to_release_page(page, GFP_KERNEL)) {
931 rc = migrate_page(newpage, page);
932 goto unlock_both;
933 }
934
935 /*
936 * On early passes with mapped pages simply
937 * retry. There may be a lock held for some
938 * buffers that may go away. Later
939 * swap them out.
940 */
941 if (pass > 4) {
942 unlock_page(newpage);
943 newpage = NULL;
944 rc = swap_page(page);
945 goto next;
946 }
947
948unlock_both:
949 unlock_page(newpage);
749 950
750unlock_page: 951unlock_page:
751 unlock_page(page); 952 unlock_page(page);
@@ -758,7 +959,10 @@ next:
758 list_move(&page->lru, failed); 959 list_move(&page->lru, failed);
759 nr_failed++; 960 nr_failed++;
760 } else { 961 } else {
761 /* Success */ 962 if (newpage) {
963 /* Successful migration. Return page to LRU */
964 move_to_lru(newpage);
965 }
762 list_move(&page->lru, moved); 966 list_move(&page->lru, moved);
763 } 967 }
764 } 968 }