diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 232 |
1 files changed, 214 insertions, 18 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 38e7cad782f4..55dbc45880c6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/memcontrol.h> | 33 | #include <linux/memcontrol.h> |
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/hugetlb.h> | ||
35 | #include <linux/gfp.h> | 36 | #include <linux/gfp.h> |
36 | 37 | ||
37 | #include "internal.h" | 38 | #include "internal.h" |
@@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
95 | pte_t *ptep, pte; | 96 | pte_t *ptep, pte; |
96 | spinlock_t *ptl; | 97 | spinlock_t *ptl; |
97 | 98 | ||
98 | pgd = pgd_offset(mm, addr); | 99 | if (unlikely(PageHuge(new))) { |
99 | if (!pgd_present(*pgd)) | 100 | ptep = huge_pte_offset(mm, addr); |
100 | goto out; | 101 | if (!ptep) |
102 | goto out; | ||
103 | ptl = &mm->page_table_lock; | ||
104 | } else { | ||
105 | pgd = pgd_offset(mm, addr); | ||
106 | if (!pgd_present(*pgd)) | ||
107 | goto out; | ||
101 | 108 | ||
102 | pud = pud_offset(pgd, addr); | 109 | pud = pud_offset(pgd, addr); |
103 | if (!pud_present(*pud)) | 110 | if (!pud_present(*pud)) |
104 | goto out; | 111 | goto out; |
105 | 112 | ||
106 | pmd = pmd_offset(pud, addr); | 113 | pmd = pmd_offset(pud, addr); |
107 | if (!pmd_present(*pmd)) | 114 | if (!pmd_present(*pmd)) |
108 | goto out; | 115 | goto out; |
109 | 116 | ||
110 | ptep = pte_offset_map(pmd, addr); | 117 | ptep = pte_offset_map(pmd, addr); |
111 | 118 | ||
112 | if (!is_swap_pte(*ptep)) { | 119 | if (!is_swap_pte(*ptep)) { |
113 | pte_unmap(ptep); | 120 | pte_unmap(ptep); |
114 | goto out; | 121 | goto out; |
115 | } | 122 | } |
123 | |||
124 | ptl = pte_lockptr(mm, pmd); | ||
125 | } | ||
116 | 126 | ||
117 | ptl = pte_lockptr(mm, pmd); | ||
118 | spin_lock(ptl); | 127 | spin_lock(ptl); |
119 | pte = *ptep; | 128 | pte = *ptep; |
120 | if (!is_swap_pte(pte)) | 129 | if (!is_swap_pte(pte)) |
@@ -130,10 +139,17 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
130 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 139 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
131 | if (is_write_migration_entry(entry)) | 140 | if (is_write_migration_entry(entry)) |
132 | pte = pte_mkwrite(pte); | 141 | pte = pte_mkwrite(pte); |
142 | if (PageHuge(new)) | ||
143 | pte = pte_mkhuge(pte); | ||
133 | flush_cache_page(vma, addr, pte_pfn(pte)); | 144 | flush_cache_page(vma, addr, pte_pfn(pte)); |
134 | set_pte_at(mm, addr, ptep, pte); | 145 | set_pte_at(mm, addr, ptep, pte); |
135 | 146 | ||
136 | if (PageAnon(new)) | 147 | if (PageHuge(new)) { |
148 | if (PageAnon(new)) | ||
149 | hugepage_add_anon_rmap(new, vma, addr); | ||
150 | else | ||
151 | page_dup_rmap(new); | ||
152 | } else if (PageAnon(new)) | ||
137 | page_add_anon_rmap(new, vma, addr); | 153 | page_add_anon_rmap(new, vma, addr); |
138 | else | 154 | else |
139 | page_add_file_rmap(new); | 155 | page_add_file_rmap(new); |
@@ -276,11 +292,59 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
276 | } | 292 | } |
277 | 293 | ||
278 | /* | 294 | /* |
295 | * The expected number of remaining references is the same as that | ||
296 | * of migrate_page_move_mapping(). | ||
297 | */ | ||
298 | int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
299 | struct page *newpage, struct page *page) | ||
300 | { | ||
301 | int expected_count; | ||
302 | void **pslot; | ||
303 | |||
304 | if (!mapping) { | ||
305 | if (page_count(page) != 1) | ||
306 | return -EAGAIN; | ||
307 | return 0; | ||
308 | } | ||
309 | |||
310 | spin_lock_irq(&mapping->tree_lock); | ||
311 | |||
312 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | ||
313 | page_index(page)); | ||
314 | |||
315 | expected_count = 2 + page_has_private(page); | ||
316 | if (page_count(page) != expected_count || | ||
317 | (struct page *)radix_tree_deref_slot(pslot) != page) { | ||
318 | spin_unlock_irq(&mapping->tree_lock); | ||
319 | return -EAGAIN; | ||
320 | } | ||
321 | |||
322 | if (!page_freeze_refs(page, expected_count)) { | ||
323 | spin_unlock_irq(&mapping->tree_lock); | ||
324 | return -EAGAIN; | ||
325 | } | ||
326 | |||
327 | get_page(newpage); | ||
328 | |||
329 | radix_tree_replace_slot(pslot, newpage); | ||
330 | |||
331 | page_unfreeze_refs(page, expected_count); | ||
332 | |||
333 | __put_page(page); | ||
334 | |||
335 | spin_unlock_irq(&mapping->tree_lock); | ||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | /* | ||
279 | * Copy the page to its new location | 340 | * Copy the page to its new location |
280 | */ | 341 | */ |
281 | static void migrate_page_copy(struct page *newpage, struct page *page) | 342 | void migrate_page_copy(struct page *newpage, struct page *page) |
282 | { | 343 | { |
283 | copy_highpage(newpage, page); | 344 | if (PageHuge(page)) |
345 | copy_huge_page(newpage, page); | ||
346 | else | ||
347 | copy_highpage(newpage, page); | ||
284 | 348 | ||
285 | if (PageError(page)) | 349 | if (PageError(page)) |
286 | SetPageError(newpage); | 350 | SetPageError(newpage); |
@@ -724,6 +788,92 @@ move_newpage: | |||
724 | } | 788 | } |
725 | 789 | ||
726 | /* | 790 | /* |
791 | * Counterpart of unmap_and_move_page() for hugepage migration. | ||
792 | * | ||
793 | * This function doesn't wait the completion of hugepage I/O | ||
794 | * because there is no race between I/O and migration for hugepage. | ||
795 | * Note that currently hugepage I/O occurs only in direct I/O | ||
796 | * where no lock is held and PG_writeback is irrelevant, | ||
797 | * and writeback status of all subpages are counted in the reference | ||
798 | * count of the head page (i.e. if all subpages of a 2MB hugepage are | ||
799 | * under direct I/O, the reference of the head page is 512 and a bit more.) | ||
800 | * This means that when we try to migrate hugepage whose subpages are | ||
801 | * doing direct I/O, some references remain after try_to_unmap() and | ||
802 | * hugepage migration fails without data corruption. | ||
803 | * | ||
804 | * There is also no race when direct I/O is issued on the page under migration, | ||
805 | * because then pte is replaced with migration swap entry and direct I/O code | ||
806 | * will wait in the page fault for migration to complete. | ||
807 | */ | ||
808 | static int unmap_and_move_huge_page(new_page_t get_new_page, | ||
809 | unsigned long private, struct page *hpage, | ||
810 | int force, int offlining) | ||
811 | { | ||
812 | int rc = 0; | ||
813 | int *result = NULL; | ||
814 | struct page *new_hpage = get_new_page(hpage, private, &result); | ||
815 | int rcu_locked = 0; | ||
816 | struct anon_vma *anon_vma = NULL; | ||
817 | |||
818 | if (!new_hpage) | ||
819 | return -ENOMEM; | ||
820 | |||
821 | rc = -EAGAIN; | ||
822 | |||
823 | if (!trylock_page(hpage)) { | ||
824 | if (!force) | ||
825 | goto out; | ||
826 | lock_page(hpage); | ||
827 | } | ||
828 | |||
829 | if (PageAnon(hpage)) { | ||
830 | rcu_read_lock(); | ||
831 | rcu_locked = 1; | ||
832 | |||
833 | if (page_mapped(hpage)) { | ||
834 | anon_vma = page_anon_vma(hpage); | ||
835 | atomic_inc(&anon_vma->external_refcount); | ||
836 | } | ||
837 | } | ||
838 | |||
839 | try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); | ||
840 | |||
841 | if (!page_mapped(hpage)) | ||
842 | rc = move_to_new_page(new_hpage, hpage, 1); | ||
843 | |||
844 | if (rc) | ||
845 | remove_migration_ptes(hpage, hpage); | ||
846 | |||
847 | if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, | ||
848 | &anon_vma->lock)) { | ||
849 | int empty = list_empty(&anon_vma->head); | ||
850 | spin_unlock(&anon_vma->lock); | ||
851 | if (empty) | ||
852 | anon_vma_free(anon_vma); | ||
853 | } | ||
854 | |||
855 | if (rcu_locked) | ||
856 | rcu_read_unlock(); | ||
857 | out: | ||
858 | unlock_page(hpage); | ||
859 | |||
860 | if (rc != -EAGAIN) { | ||
861 | list_del(&hpage->lru); | ||
862 | put_page(hpage); | ||
863 | } | ||
864 | |||
865 | put_page(new_hpage); | ||
866 | |||
867 | if (result) { | ||
868 | if (rc) | ||
869 | *result = rc; | ||
870 | else | ||
871 | *result = page_to_nid(new_hpage); | ||
872 | } | ||
873 | return rc; | ||
874 | } | ||
875 | |||
876 | /* | ||
727 | * migrate_pages | 877 | * migrate_pages |
728 | * | 878 | * |
729 | * The function takes one list of pages to migrate and a function | 879 | * The function takes one list of pages to migrate and a function |
@@ -788,6 +938,52 @@ out: | |||
788 | return nr_failed + retry; | 938 | return nr_failed + retry; |
789 | } | 939 | } |
790 | 940 | ||
941 | int migrate_huge_pages(struct list_head *from, | ||
942 | new_page_t get_new_page, unsigned long private, int offlining) | ||
943 | { | ||
944 | int retry = 1; | ||
945 | int nr_failed = 0; | ||
946 | int pass = 0; | ||
947 | struct page *page; | ||
948 | struct page *page2; | ||
949 | int rc; | ||
950 | |||
951 | for (pass = 0; pass < 10 && retry; pass++) { | ||
952 | retry = 0; | ||
953 | |||
954 | list_for_each_entry_safe(page, page2, from, lru) { | ||
955 | cond_resched(); | ||
956 | |||
957 | rc = unmap_and_move_huge_page(get_new_page, | ||
958 | private, page, pass > 2, offlining); | ||
959 | |||
960 | switch(rc) { | ||
961 | case -ENOMEM: | ||
962 | goto out; | ||
963 | case -EAGAIN: | ||
964 | retry++; | ||
965 | break; | ||
966 | case 0: | ||
967 | break; | ||
968 | default: | ||
969 | /* Permanent failure */ | ||
970 | nr_failed++; | ||
971 | break; | ||
972 | } | ||
973 | } | ||
974 | } | ||
975 | rc = 0; | ||
976 | out: | ||
977 | |||
978 | list_for_each_entry_safe(page, page2, from, lru) | ||
979 | put_page(page); | ||
980 | |||
981 | if (rc) | ||
982 | return rc; | ||
983 | |||
984 | return nr_failed + retry; | ||
985 | } | ||
986 | |||
791 | #ifdef CONFIG_NUMA | 987 | #ifdef CONFIG_NUMA |
792 | /* | 988 | /* |
793 | * Move a list of individual pages | 989 | * Move a list of individual pages |