aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c249
1 files changed, 224 insertions, 25 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 38e7cad782f4..fe5a3c6a5426 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -32,6 +32,7 @@
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/memcontrol.h> 33#include <linux/memcontrol.h>
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/hugetlb.h>
35#include <linux/gfp.h> 36#include <linux/gfp.h>
36 37
37#include "internal.h" 38#include "internal.h"
@@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
95 pte_t *ptep, pte; 96 pte_t *ptep, pte;
96 spinlock_t *ptl; 97 spinlock_t *ptl;
97 98
98 pgd = pgd_offset(mm, addr); 99 if (unlikely(PageHuge(new))) {
99 if (!pgd_present(*pgd)) 100 ptep = huge_pte_offset(mm, addr);
100 goto out; 101 if (!ptep)
102 goto out;
103 ptl = &mm->page_table_lock;
104 } else {
105 pgd = pgd_offset(mm, addr);
106 if (!pgd_present(*pgd))
107 goto out;
101 108
102 pud = pud_offset(pgd, addr); 109 pud = pud_offset(pgd, addr);
103 if (!pud_present(*pud)) 110 if (!pud_present(*pud))
104 goto out; 111 goto out;
105 112
106 pmd = pmd_offset(pud, addr); 113 pmd = pmd_offset(pud, addr);
107 if (!pmd_present(*pmd)) 114 if (!pmd_present(*pmd))
108 goto out; 115 goto out;
109 116
110 ptep = pte_offset_map(pmd, addr); 117 ptep = pte_offset_map(pmd, addr);
111 118
112 if (!is_swap_pte(*ptep)) { 119 if (!is_swap_pte(*ptep)) {
113 pte_unmap(ptep); 120 pte_unmap(ptep);
114 goto out; 121 goto out;
115 } 122 }
123
124 ptl = pte_lockptr(mm, pmd);
125 }
116 126
117 ptl = pte_lockptr(mm, pmd);
118 spin_lock(ptl); 127 spin_lock(ptl);
119 pte = *ptep; 128 pte = *ptep;
120 if (!is_swap_pte(pte)) 129 if (!is_swap_pte(pte))
@@ -130,10 +139,19 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
130 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 139 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
131 if (is_write_migration_entry(entry)) 140 if (is_write_migration_entry(entry))
132 pte = pte_mkwrite(pte); 141 pte = pte_mkwrite(pte);
142#ifdef CONFIG_HUGETLB_PAGE
143 if (PageHuge(new))
144 pte = pte_mkhuge(pte);
145#endif
133 flush_cache_page(vma, addr, pte_pfn(pte)); 146 flush_cache_page(vma, addr, pte_pfn(pte));
134 set_pte_at(mm, addr, ptep, pte); 147 set_pte_at(mm, addr, ptep, pte);
135 148
136 if (PageAnon(new)) 149 if (PageHuge(new)) {
150 if (PageAnon(new))
151 hugepage_add_anon_rmap(new, vma, addr);
152 else
153 page_dup_rmap(new);
154 } else if (PageAnon(new))
137 page_add_anon_rmap(new, vma, addr); 155 page_add_anon_rmap(new, vma, addr);
138 else 156 else
139 page_add_file_rmap(new); 157 page_add_file_rmap(new);
@@ -276,11 +294,59 @@ static int migrate_page_move_mapping(struct address_space *mapping,
276} 294}
277 295
278/* 296/*
297 * The expected number of remaining references is the same as that
298 * of migrate_page_move_mapping().
299 */
300int migrate_huge_page_move_mapping(struct address_space *mapping,
301 struct page *newpage, struct page *page)
302{
303 int expected_count;
304 void **pslot;
305
306 if (!mapping) {
307 if (page_count(page) != 1)
308 return -EAGAIN;
309 return 0;
310 }
311
312 spin_lock_irq(&mapping->tree_lock);
313
314 pslot = radix_tree_lookup_slot(&mapping->page_tree,
315 page_index(page));
316
317 expected_count = 2 + page_has_private(page);
318 if (page_count(page) != expected_count ||
319 (struct page *)radix_tree_deref_slot(pslot) != page) {
320 spin_unlock_irq(&mapping->tree_lock);
321 return -EAGAIN;
322 }
323
324 if (!page_freeze_refs(page, expected_count)) {
325 spin_unlock_irq(&mapping->tree_lock);
326 return -EAGAIN;
327 }
328
329 get_page(newpage);
330
331 radix_tree_replace_slot(pslot, newpage);
332
333 page_unfreeze_refs(page, expected_count);
334
335 __put_page(page);
336
337 spin_unlock_irq(&mapping->tree_lock);
338 return 0;
339}
340
341/*
279 * Copy the page to its new location 342 * Copy the page to its new location
280 */ 343 */
281static void migrate_page_copy(struct page *newpage, struct page *page) 344void migrate_page_copy(struct page *newpage, struct page *page)
282{ 345{
283 copy_highpage(newpage, page); 346 if (PageHuge(page))
347 copy_huge_page(newpage, page);
348 else
349 copy_highpage(newpage, page);
284 350
285 if (PageError(page)) 351 if (PageError(page))
286 SetPageError(newpage); 352 SetPageError(newpage);
@@ -431,7 +497,6 @@ static int writeout(struct address_space *mapping, struct page *page)
431 .nr_to_write = 1, 497 .nr_to_write = 1,
432 .range_start = 0, 498 .range_start = 0,
433 .range_end = LLONG_MAX, 499 .range_end = LLONG_MAX,
434 .nonblocking = 1,
435 .for_reclaim = 1 500 .for_reclaim = 1
436 }; 501 };
437 int rc; 502 int rc;
@@ -724,6 +789,92 @@ move_newpage:
724} 789}
725 790
726/* 791/*
792 * Counterpart of unmap_and_move_page() for hugepage migration.
793 *
794 * This function doesn't wait the completion of hugepage I/O
795 * because there is no race between I/O and migration for hugepage.
796 * Note that currently hugepage I/O occurs only in direct I/O
797 * where no lock is held and PG_writeback is irrelevant,
798 * and writeback status of all subpages are counted in the reference
799 * count of the head page (i.e. if all subpages of a 2MB hugepage are
800 * under direct I/O, the reference of the head page is 512 and a bit more.)
801 * This means that when we try to migrate hugepage whose subpages are
802 * doing direct I/O, some references remain after try_to_unmap() and
803 * hugepage migration fails without data corruption.
804 *
805 * There is also no race when direct I/O is issued on the page under migration,
806 * because then pte is replaced with migration swap entry and direct I/O code
807 * will wait in the page fault for migration to complete.
808 */
809static int unmap_and_move_huge_page(new_page_t get_new_page,
810 unsigned long private, struct page *hpage,
811 int force, int offlining)
812{
813 int rc = 0;
814 int *result = NULL;
815 struct page *new_hpage = get_new_page(hpage, private, &result);
816 int rcu_locked = 0;
817 struct anon_vma *anon_vma = NULL;
818
819 if (!new_hpage)
820 return -ENOMEM;
821
822 rc = -EAGAIN;
823
824 if (!trylock_page(hpage)) {
825 if (!force)
826 goto out;
827 lock_page(hpage);
828 }
829
830 if (PageAnon(hpage)) {
831 rcu_read_lock();
832 rcu_locked = 1;
833
834 if (page_mapped(hpage)) {
835 anon_vma = page_anon_vma(hpage);
836 atomic_inc(&anon_vma->external_refcount);
837 }
838 }
839
840 try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
841
842 if (!page_mapped(hpage))
843 rc = move_to_new_page(new_hpage, hpage, 1);
844
845 if (rc)
846 remove_migration_ptes(hpage, hpage);
847
848 if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount,
849 &anon_vma->lock)) {
850 int empty = list_empty(&anon_vma->head);
851 spin_unlock(&anon_vma->lock);
852 if (empty)
853 anon_vma_free(anon_vma);
854 }
855
856 if (rcu_locked)
857 rcu_read_unlock();
858out:
859 unlock_page(hpage);
860
861 if (rc != -EAGAIN) {
862 list_del(&hpage->lru);
863 put_page(hpage);
864 }
865
866 put_page(new_hpage);
867
868 if (result) {
869 if (rc)
870 *result = rc;
871 else
872 *result = page_to_nid(new_hpage);
873 }
874 return rc;
875}
876
877/*
727 * migrate_pages 878 * migrate_pages
728 * 879 *
729 * The function takes one list of pages to migrate and a function 880 * The function takes one list of pages to migrate and a function
@@ -732,8 +883,9 @@ move_newpage:
732 * 883 *
733 * The function returns after 10 attempts or if no pages 884 * The function returns after 10 attempts or if no pages
734 * are movable anymore because to has become empty 885 * are movable anymore because to has become empty
735 * or no retryable pages exist anymore. All pages will be 886 * or no retryable pages exist anymore.
736 * returned to the LRU or freed. 887 * Caller should call putback_lru_pages to return pages to the LRU
888 * or free list.
737 * 889 *
738 * Return: Number of pages not migrated or error code. 890 * Return: Number of pages not migrated or error code.
739 */ 891 */
@@ -780,7 +932,51 @@ out:
780 if (!swapwrite) 932 if (!swapwrite)
781 current->flags &= ~PF_SWAPWRITE; 933 current->flags &= ~PF_SWAPWRITE;
782 934
783 putback_lru_pages(from); 935 if (rc)
936 return rc;
937
938 return nr_failed + retry;
939}
940
941int migrate_huge_pages(struct list_head *from,
942 new_page_t get_new_page, unsigned long private, int offlining)
943{
944 int retry = 1;
945 int nr_failed = 0;
946 int pass = 0;
947 struct page *page;
948 struct page *page2;
949 int rc;
950
951 for (pass = 0; pass < 10 && retry; pass++) {
952 retry = 0;
953
954 list_for_each_entry_safe(page, page2, from, lru) {
955 cond_resched();
956
957 rc = unmap_and_move_huge_page(get_new_page,
958 private, page, pass > 2, offlining);
959
960 switch(rc) {
961 case -ENOMEM:
962 goto out;
963 case -EAGAIN:
964 retry++;
965 break;
966 case 0:
967 break;
968 default:
969 /* Permanent failure */
970 nr_failed++;
971 break;
972 }
973 }
974 }
975 rc = 0;
976out:
977
978 list_for_each_entry_safe(page, page2, from, lru)
979 put_page(page);
784 980
785 if (rc) 981 if (rc)
786 return rc; 982 return rc;
@@ -841,7 +1037,7 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
841 1037
842 err = -EFAULT; 1038 err = -EFAULT;
843 vma = find_vma(mm, pp->addr); 1039 vma = find_vma(mm, pp->addr);
844 if (!vma || !vma_migratable(vma)) 1040 if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
845 goto set_status; 1041 goto set_status;
846 1042
847 page = follow_page(vma, pp->addr, FOLL_GET); 1043 page = follow_page(vma, pp->addr, FOLL_GET);
@@ -890,9 +1086,12 @@ set_status:
890 } 1086 }
891 1087
892 err = 0; 1088 err = 0;
893 if (!list_empty(&pagelist)) 1089 if (!list_empty(&pagelist)) {
894 err = migrate_pages(&pagelist, new_page_node, 1090 err = migrate_pages(&pagelist, new_page_node,
895 (unsigned long)pm, 0); 1091 (unsigned long)pm, 0);
1092 if (err)
1093 putback_lru_pages(&pagelist);
1094 }
896 1095
897 up_read(&mm->mmap_sem); 1096 up_read(&mm->mmap_sem);
898 return err; 1097 return err;
@@ -1005,7 +1204,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
1005 int err = -EFAULT; 1204 int err = -EFAULT;
1006 1205
1007 vma = find_vma(mm, addr); 1206 vma = find_vma(mm, addr);
1008 if (!vma) 1207 if (!vma || addr < vma->vm_start)
1009 goto set_status; 1208 goto set_status;
1010 1209
1011 page = follow_page(vma, addr, 0); 1210 page = follow_page(vma, addr, 0);