diff options
-rw-r--r-- | fs/hugetlbfs/inode.c | 15 | ||||
-rw-r--r-- | include/linux/migrate.h | 16 | ||||
-rw-r--r-- | mm/hugetlb.c | 18 | ||||
-rw-r--r-- | mm/migrate.c | 232 |
4 files changed, 262 insertions, 19 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6e5bd42f3860..1f7ca505d48e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/magic.h> | 33 | #include <linux/magic.h> |
34 | #include <linux/migrate.h> | ||
34 | 35 | ||
35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
36 | 37 | ||
@@ -573,6 +574,19 @@ static int hugetlbfs_set_page_dirty(struct page *page) | |||
573 | return 0; | 574 | return 0; |
574 | } | 575 | } |
575 | 576 | ||
577 | static int hugetlbfs_migrate_page(struct address_space *mapping, | ||
578 | struct page *newpage, struct page *page) | ||
579 | { | ||
580 | int rc; | ||
581 | |||
582 | rc = migrate_huge_page_move_mapping(mapping, newpage, page); | ||
583 | if (rc) | ||
584 | return rc; | ||
585 | migrate_page_copy(newpage, page); | ||
586 | |||
587 | return 0; | ||
588 | } | ||
589 | |||
576 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 590 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
577 | { | 591 | { |
578 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); | 592 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); |
@@ -659,6 +673,7 @@ static const struct address_space_operations hugetlbfs_aops = { | |||
659 | .write_begin = hugetlbfs_write_begin, | 673 | .write_begin = hugetlbfs_write_begin, |
660 | .write_end = hugetlbfs_write_end, | 674 | .write_end = hugetlbfs_write_end, |
661 | .set_page_dirty = hugetlbfs_set_page_dirty, | 675 | .set_page_dirty = hugetlbfs_set_page_dirty, |
676 | .migratepage = hugetlbfs_migrate_page, | ||
662 | }; | 677 | }; |
663 | 678 | ||
664 | 679 | ||
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 7238231b8dd4..3c1941e40e61 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
@@ -14,6 +14,8 @@ extern int migrate_page(struct address_space *, | |||
14 | struct page *, struct page *); | 14 | struct page *, struct page *); |
15 | extern int migrate_pages(struct list_head *l, new_page_t x, | 15 | extern int migrate_pages(struct list_head *l, new_page_t x, |
16 | unsigned long private, int offlining); | 16 | unsigned long private, int offlining); |
17 | extern int migrate_huge_pages(struct list_head *l, new_page_t x, | ||
18 | unsigned long private, int offlining); | ||
17 | 19 | ||
18 | extern int fail_migrate_page(struct address_space *, | 20 | extern int fail_migrate_page(struct address_space *, |
19 | struct page *, struct page *); | 21 | struct page *, struct page *); |
@@ -23,12 +25,17 @@ extern int migrate_prep_local(void); | |||
23 | extern int migrate_vmas(struct mm_struct *mm, | 25 | extern int migrate_vmas(struct mm_struct *mm, |
24 | const nodemask_t *from, const nodemask_t *to, | 26 | const nodemask_t *from, const nodemask_t *to, |
25 | unsigned long flags); | 27 | unsigned long flags); |
28 | extern void migrate_page_copy(struct page *newpage, struct page *page); | ||
29 | extern int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
30 | struct page *newpage, struct page *page); | ||
26 | #else | 31 | #else |
27 | #define PAGE_MIGRATION 0 | 32 | #define PAGE_MIGRATION 0 |
28 | 33 | ||
29 | static inline void putback_lru_pages(struct list_head *l) {} | 34 | static inline void putback_lru_pages(struct list_head *l) {} |
30 | static inline int migrate_pages(struct list_head *l, new_page_t x, | 35 | static inline int migrate_pages(struct list_head *l, new_page_t x, |
31 | unsigned long private, int offlining) { return -ENOSYS; } | 36 | unsigned long private, int offlining) { return -ENOSYS; } |
37 | static inline int migrate_huge_pages(struct list_head *l, new_page_t x, | ||
38 | unsigned long private, int offlining) { return -ENOSYS; } | ||
32 | 39 | ||
33 | static inline int migrate_prep(void) { return -ENOSYS; } | 40 | static inline int migrate_prep(void) { return -ENOSYS; } |
34 | static inline int migrate_prep_local(void) { return -ENOSYS; } | 41 | static inline int migrate_prep_local(void) { return -ENOSYS; } |
@@ -40,6 +47,15 @@ static inline int migrate_vmas(struct mm_struct *mm, | |||
40 | return -ENOSYS; | 47 | return -ENOSYS; |
41 | } | 48 | } |
42 | 49 | ||
50 | static inline void migrate_page_copy(struct page *newpage, | ||
51 | struct page *page) {} | ||
52 | |||
53 | extern int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
54 | struct page *newpage, struct page *page) | ||
55 | { | ||
56 | return -ENOSYS; | ||
57 | } | ||
58 | |||
43 | /* Possible settings for the migrate_page() method in address_operations */ | 59 | /* Possible settings for the migrate_page() method in address_operations */ |
44 | #define migrate_page NULL | 60 | #define migrate_page NULL |
45 | #define fail_migrate_page NULL | 61 | #define fail_migrate_page NULL |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a73dbdcb89eb..0fa9de8361bd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2217,6 +2217,19 @@ nomem: | |||
2217 | return -ENOMEM; | 2217 | return -ENOMEM; |
2218 | } | 2218 | } |
2219 | 2219 | ||
2220 | static int is_hugetlb_entry_migration(pte_t pte) | ||
2221 | { | ||
2222 | swp_entry_t swp; | ||
2223 | |||
2224 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2225 | return 0; | ||
2226 | swp = pte_to_swp_entry(pte); | ||
2227 | if (non_swap_entry(swp) && is_migration_entry(swp)) { | ||
2228 | return 1; | ||
2229 | } else | ||
2230 | return 0; | ||
2231 | } | ||
2232 | |||
2220 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | 2233 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) |
2221 | { | 2234 | { |
2222 | swp_entry_t swp; | 2235 | swp_entry_t swp; |
@@ -2648,7 +2661,10 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2648 | ptep = huge_pte_offset(mm, address); | 2661 | ptep = huge_pte_offset(mm, address); |
2649 | if (ptep) { | 2662 | if (ptep) { |
2650 | entry = huge_ptep_get(ptep); | 2663 | entry = huge_ptep_get(ptep); |
2651 | if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | 2664 | if (unlikely(is_hugetlb_entry_migration(entry))) { |
2665 | migration_entry_wait(mm, (pmd_t *)ptep, address); | ||
2666 | return 0; | ||
2667 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | ||
2652 | return VM_FAULT_HWPOISON; | 2668 | return VM_FAULT_HWPOISON; |
2653 | } | 2669 | } |
2654 | 2670 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 38e7cad782f4..55dbc45880c6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/memcontrol.h> | 33 | #include <linux/memcontrol.h> |
34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/hugetlb.h> | ||
35 | #include <linux/gfp.h> | 36 | #include <linux/gfp.h> |
36 | 37 | ||
37 | #include "internal.h" | 38 | #include "internal.h" |
@@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
95 | pte_t *ptep, pte; | 96 | pte_t *ptep, pte; |
96 | spinlock_t *ptl; | 97 | spinlock_t *ptl; |
97 | 98 | ||
98 | pgd = pgd_offset(mm, addr); | 99 | if (unlikely(PageHuge(new))) { |
99 | if (!pgd_present(*pgd)) | 100 | ptep = huge_pte_offset(mm, addr); |
100 | goto out; | 101 | if (!ptep) |
102 | goto out; | ||
103 | ptl = &mm->page_table_lock; | ||
104 | } else { | ||
105 | pgd = pgd_offset(mm, addr); | ||
106 | if (!pgd_present(*pgd)) | ||
107 | goto out; | ||
101 | 108 | ||
102 | pud = pud_offset(pgd, addr); | 109 | pud = pud_offset(pgd, addr); |
103 | if (!pud_present(*pud)) | 110 | if (!pud_present(*pud)) |
104 | goto out; | 111 | goto out; |
105 | 112 | ||
106 | pmd = pmd_offset(pud, addr); | 113 | pmd = pmd_offset(pud, addr); |
107 | if (!pmd_present(*pmd)) | 114 | if (!pmd_present(*pmd)) |
108 | goto out; | 115 | goto out; |
109 | 116 | ||
110 | ptep = pte_offset_map(pmd, addr); | 117 | ptep = pte_offset_map(pmd, addr); |
111 | 118 | ||
112 | if (!is_swap_pte(*ptep)) { | 119 | if (!is_swap_pte(*ptep)) { |
113 | pte_unmap(ptep); | 120 | pte_unmap(ptep); |
114 | goto out; | 121 | goto out; |
115 | } | 122 | } |
123 | |||
124 | ptl = pte_lockptr(mm, pmd); | ||
125 | } | ||
116 | 126 | ||
117 | ptl = pte_lockptr(mm, pmd); | ||
118 | spin_lock(ptl); | 127 | spin_lock(ptl); |
119 | pte = *ptep; | 128 | pte = *ptep; |
120 | if (!is_swap_pte(pte)) | 129 | if (!is_swap_pte(pte)) |
@@ -130,10 +139,17 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
130 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 139 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
131 | if (is_write_migration_entry(entry)) | 140 | if (is_write_migration_entry(entry)) |
132 | pte = pte_mkwrite(pte); | 141 | pte = pte_mkwrite(pte); |
142 | if (PageHuge(new)) | ||
143 | pte = pte_mkhuge(pte); | ||
133 | flush_cache_page(vma, addr, pte_pfn(pte)); | 144 | flush_cache_page(vma, addr, pte_pfn(pte)); |
134 | set_pte_at(mm, addr, ptep, pte); | 145 | set_pte_at(mm, addr, ptep, pte); |
135 | 146 | ||
136 | if (PageAnon(new)) | 147 | if (PageHuge(new)) { |
148 | if (PageAnon(new)) | ||
149 | hugepage_add_anon_rmap(new, vma, addr); | ||
150 | else | ||
151 | page_dup_rmap(new); | ||
152 | } else if (PageAnon(new)) | ||
137 | page_add_anon_rmap(new, vma, addr); | 153 | page_add_anon_rmap(new, vma, addr); |
138 | else | 154 | else |
139 | page_add_file_rmap(new); | 155 | page_add_file_rmap(new); |
@@ -276,11 +292,59 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
276 | } | 292 | } |
277 | 293 | ||
278 | /* | 294 | /* |
295 | * The expected number of remaining references is the same as that | ||
296 | * of migrate_page_move_mapping(). | ||
297 | */ | ||
298 | int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
299 | struct page *newpage, struct page *page) | ||
300 | { | ||
301 | int expected_count; | ||
302 | void **pslot; | ||
303 | |||
304 | if (!mapping) { | ||
305 | if (page_count(page) != 1) | ||
306 | return -EAGAIN; | ||
307 | return 0; | ||
308 | } | ||
309 | |||
310 | spin_lock_irq(&mapping->tree_lock); | ||
311 | |||
312 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | ||
313 | page_index(page)); | ||
314 | |||
315 | expected_count = 2 + page_has_private(page); | ||
316 | if (page_count(page) != expected_count || | ||
317 | (struct page *)radix_tree_deref_slot(pslot) != page) { | ||
318 | spin_unlock_irq(&mapping->tree_lock); | ||
319 | return -EAGAIN; | ||
320 | } | ||
321 | |||
322 | if (!page_freeze_refs(page, expected_count)) { | ||
323 | spin_unlock_irq(&mapping->tree_lock); | ||
324 | return -EAGAIN; | ||
325 | } | ||
326 | |||
327 | get_page(newpage); | ||
328 | |||
329 | radix_tree_replace_slot(pslot, newpage); | ||
330 | |||
331 | page_unfreeze_refs(page, expected_count); | ||
332 | |||
333 | __put_page(page); | ||
334 | |||
335 | spin_unlock_irq(&mapping->tree_lock); | ||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | /* | ||
279 | * Copy the page to its new location | 340 | * Copy the page to its new location |
280 | */ | 341 | */ |
281 | static void migrate_page_copy(struct page *newpage, struct page *page) | 342 | void migrate_page_copy(struct page *newpage, struct page *page) |
282 | { | 343 | { |
283 | copy_highpage(newpage, page); | 344 | if (PageHuge(page)) |
345 | copy_huge_page(newpage, page); | ||
346 | else | ||
347 | copy_highpage(newpage, page); | ||
284 | 348 | ||
285 | if (PageError(page)) | 349 | if (PageError(page)) |
286 | SetPageError(newpage); | 350 | SetPageError(newpage); |
@@ -724,6 +788,92 @@ move_newpage: | |||
724 | } | 788 | } |
725 | 789 | ||
726 | /* | 790 | /* |
791 | * Counterpart of unmap_and_move_page() for hugepage migration. | ||
792 | * | ||
793 | * This function doesn't wait the completion of hugepage I/O | ||
794 | * because there is no race between I/O and migration for hugepage. | ||
795 | * Note that currently hugepage I/O occurs only in direct I/O | ||
796 | * where no lock is held and PG_writeback is irrelevant, | ||
797 | * and writeback status of all subpages are counted in the reference | ||
798 | * count of the head page (i.e. if all subpages of a 2MB hugepage are | ||
799 | * under direct I/O, the reference of the head page is 512 and a bit more.) | ||
800 | * This means that when we try to migrate hugepage whose subpages are | ||
801 | * doing direct I/O, some references remain after try_to_unmap() and | ||
802 | * hugepage migration fails without data corruption. | ||
803 | * | ||
804 | * There is also no race when direct I/O is issued on the page under migration, | ||
805 | * because then pte is replaced with migration swap entry and direct I/O code | ||
806 | * will wait in the page fault for migration to complete. | ||
807 | */ | ||
808 | static int unmap_and_move_huge_page(new_page_t get_new_page, | ||
809 | unsigned long private, struct page *hpage, | ||
810 | int force, int offlining) | ||
811 | { | ||
812 | int rc = 0; | ||
813 | int *result = NULL; | ||
814 | struct page *new_hpage = get_new_page(hpage, private, &result); | ||
815 | int rcu_locked = 0; | ||
816 | struct anon_vma *anon_vma = NULL; | ||
817 | |||
818 | if (!new_hpage) | ||
819 | return -ENOMEM; | ||
820 | |||
821 | rc = -EAGAIN; | ||
822 | |||
823 | if (!trylock_page(hpage)) { | ||
824 | if (!force) | ||
825 | goto out; | ||
826 | lock_page(hpage); | ||
827 | } | ||
828 | |||
829 | if (PageAnon(hpage)) { | ||
830 | rcu_read_lock(); | ||
831 | rcu_locked = 1; | ||
832 | |||
833 | if (page_mapped(hpage)) { | ||
834 | anon_vma = page_anon_vma(hpage); | ||
835 | atomic_inc(&anon_vma->external_refcount); | ||
836 | } | ||
837 | } | ||
838 | |||
839 | try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); | ||
840 | |||
841 | if (!page_mapped(hpage)) | ||
842 | rc = move_to_new_page(new_hpage, hpage, 1); | ||
843 | |||
844 | if (rc) | ||
845 | remove_migration_ptes(hpage, hpage); | ||
846 | |||
847 | if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, | ||
848 | &anon_vma->lock)) { | ||
849 | int empty = list_empty(&anon_vma->head); | ||
850 | spin_unlock(&anon_vma->lock); | ||
851 | if (empty) | ||
852 | anon_vma_free(anon_vma); | ||
853 | } | ||
854 | |||
855 | if (rcu_locked) | ||
856 | rcu_read_unlock(); | ||
857 | out: | ||
858 | unlock_page(hpage); | ||
859 | |||
860 | if (rc != -EAGAIN) { | ||
861 | list_del(&hpage->lru); | ||
862 | put_page(hpage); | ||
863 | } | ||
864 | |||
865 | put_page(new_hpage); | ||
866 | |||
867 | if (result) { | ||
868 | if (rc) | ||
869 | *result = rc; | ||
870 | else | ||
871 | *result = page_to_nid(new_hpage); | ||
872 | } | ||
873 | return rc; | ||
874 | } | ||
875 | |||
876 | /* | ||
727 | * migrate_pages | 877 | * migrate_pages |
728 | * | 878 | * |
729 | * The function takes one list of pages to migrate and a function | 879 | * The function takes one list of pages to migrate and a function |
@@ -788,6 +938,52 @@ out: | |||
788 | return nr_failed + retry; | 938 | return nr_failed + retry; |
789 | } | 939 | } |
790 | 940 | ||
941 | int migrate_huge_pages(struct list_head *from, | ||
942 | new_page_t get_new_page, unsigned long private, int offlining) | ||
943 | { | ||
944 | int retry = 1; | ||
945 | int nr_failed = 0; | ||
946 | int pass = 0; | ||
947 | struct page *page; | ||
948 | struct page *page2; | ||
949 | int rc; | ||
950 | |||
951 | for (pass = 0; pass < 10 && retry; pass++) { | ||
952 | retry = 0; | ||
953 | |||
954 | list_for_each_entry_safe(page, page2, from, lru) { | ||
955 | cond_resched(); | ||
956 | |||
957 | rc = unmap_and_move_huge_page(get_new_page, | ||
958 | private, page, pass > 2, offlining); | ||
959 | |||
960 | switch(rc) { | ||
961 | case -ENOMEM: | ||
962 | goto out; | ||
963 | case -EAGAIN: | ||
964 | retry++; | ||
965 | break; | ||
966 | case 0: | ||
967 | break; | ||
968 | default: | ||
969 | /* Permanent failure */ | ||
970 | nr_failed++; | ||
971 | break; | ||
972 | } | ||
973 | } | ||
974 | } | ||
975 | rc = 0; | ||
976 | out: | ||
977 | |||
978 | list_for_each_entry_safe(page, page2, from, lru) | ||
979 | put_page(page); | ||
980 | |||
981 | if (rc) | ||
982 | return rc; | ||
983 | |||
984 | return nr_failed + retry; | ||
985 | } | ||
986 | |||
791 | #ifdef CONFIG_NUMA | 987 | #ifdef CONFIG_NUMA |
792 | /* | 988 | /* |
793 | * Move a list of individual pages | 989 | * Move a list of individual pages |