diff options
| -rw-r--r-- | fs/hugetlbfs/inode.c | 15 | ||||
| -rw-r--r-- | include/linux/migrate.h | 16 | ||||
| -rw-r--r-- | mm/hugetlb.c | 18 | ||||
| -rw-r--r-- | mm/migrate.c | 232 |
4 files changed, 262 insertions, 19 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6e5bd42f3860..1f7ca505d48e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
| 32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
| 33 | #include <linux/magic.h> | 33 | #include <linux/magic.h> |
| 34 | #include <linux/migrate.h> | ||
| 34 | 35 | ||
| 35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
| 36 | 37 | ||
| @@ -573,6 +574,19 @@ static int hugetlbfs_set_page_dirty(struct page *page) | |||
| 573 | return 0; | 574 | return 0; |
| 574 | } | 575 | } |
| 575 | 576 | ||
| 577 | static int hugetlbfs_migrate_page(struct address_space *mapping, | ||
| 578 | struct page *newpage, struct page *page) | ||
| 579 | { | ||
| 580 | int rc; | ||
| 581 | |||
| 582 | rc = migrate_huge_page_move_mapping(mapping, newpage, page); | ||
| 583 | if (rc) | ||
| 584 | return rc; | ||
| 585 | migrate_page_copy(newpage, page); | ||
| 586 | |||
| 587 | return 0; | ||
| 588 | } | ||
| 589 | |||
| 576 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 590 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
| 577 | { | 591 | { |
| 578 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); | 592 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); |
| @@ -659,6 +673,7 @@ static const struct address_space_operations hugetlbfs_aops = { | |||
| 659 | .write_begin = hugetlbfs_write_begin, | 673 | .write_begin = hugetlbfs_write_begin, |
| 660 | .write_end = hugetlbfs_write_end, | 674 | .write_end = hugetlbfs_write_end, |
| 661 | .set_page_dirty = hugetlbfs_set_page_dirty, | 675 | .set_page_dirty = hugetlbfs_set_page_dirty, |
| 676 | .migratepage = hugetlbfs_migrate_page, | ||
| 662 | }; | 677 | }; |
| 663 | 678 | ||
| 664 | 679 | ||
diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 7238231b8dd4..3c1941e40e61 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h | |||
| @@ -14,6 +14,8 @@ extern int migrate_page(struct address_space *, | |||
| 14 | struct page *, struct page *); | 14 | struct page *, struct page *); |
| 15 | extern int migrate_pages(struct list_head *l, new_page_t x, | 15 | extern int migrate_pages(struct list_head *l, new_page_t x, |
| 16 | unsigned long private, int offlining); | 16 | unsigned long private, int offlining); |
| 17 | extern int migrate_huge_pages(struct list_head *l, new_page_t x, | ||
| 18 | unsigned long private, int offlining); | ||
| 17 | 19 | ||
| 18 | extern int fail_migrate_page(struct address_space *, | 20 | extern int fail_migrate_page(struct address_space *, |
| 19 | struct page *, struct page *); | 21 | struct page *, struct page *); |
| @@ -23,12 +25,17 @@ extern int migrate_prep_local(void); | |||
| 23 | extern int migrate_vmas(struct mm_struct *mm, | 25 | extern int migrate_vmas(struct mm_struct *mm, |
| 24 | const nodemask_t *from, const nodemask_t *to, | 26 | const nodemask_t *from, const nodemask_t *to, |
| 25 | unsigned long flags); | 27 | unsigned long flags); |
| 28 | extern void migrate_page_copy(struct page *newpage, struct page *page); | ||
| 29 | extern int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
| 30 | struct page *newpage, struct page *page); | ||
| 26 | #else | 31 | #else |
| 27 | #define PAGE_MIGRATION 0 | 32 | #define PAGE_MIGRATION 0 |
| 28 | 33 | ||
| 29 | static inline void putback_lru_pages(struct list_head *l) {} | 34 | static inline void putback_lru_pages(struct list_head *l) {} |
| 30 | static inline int migrate_pages(struct list_head *l, new_page_t x, | 35 | static inline int migrate_pages(struct list_head *l, new_page_t x, |
| 31 | unsigned long private, int offlining) { return -ENOSYS; } | 36 | unsigned long private, int offlining) { return -ENOSYS; } |
| 37 | static inline int migrate_huge_pages(struct list_head *l, new_page_t x, | ||
| 38 | unsigned long private, int offlining) { return -ENOSYS; } | ||
| 32 | 39 | ||
| 33 | static inline int migrate_prep(void) { return -ENOSYS; } | 40 | static inline int migrate_prep(void) { return -ENOSYS; } |
| 34 | static inline int migrate_prep_local(void) { return -ENOSYS; } | 41 | static inline int migrate_prep_local(void) { return -ENOSYS; } |
| @@ -40,6 +47,15 @@ static inline int migrate_vmas(struct mm_struct *mm, | |||
| 40 | return -ENOSYS; | 47 | return -ENOSYS; |
| 41 | } | 48 | } |
| 42 | 49 | ||
| 50 | static inline void migrate_page_copy(struct page *newpage, | ||
| 51 | struct page *page) {} | ||
| 52 | |||
| 53 | extern int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
| 54 | struct page *newpage, struct page *page) | ||
| 55 | { | ||
| 56 | return -ENOSYS; | ||
| 57 | } | ||
| 58 | |||
| 43 | /* Possible settings for the migrate_page() method in address_operations */ | 59 | /* Possible settings for the migrate_page() method in address_operations */ |
| 44 | #define migrate_page NULL | 60 | #define migrate_page NULL |
| 45 | #define fail_migrate_page NULL | 61 | #define fail_migrate_page NULL |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a73dbdcb89eb..0fa9de8361bd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -2217,6 +2217,19 @@ nomem: | |||
| 2217 | return -ENOMEM; | 2217 | return -ENOMEM; |
| 2218 | } | 2218 | } |
| 2219 | 2219 | ||
| 2220 | static int is_hugetlb_entry_migration(pte_t pte) | ||
| 2221 | { | ||
| 2222 | swp_entry_t swp; | ||
| 2223 | |||
| 2224 | if (huge_pte_none(pte) || pte_present(pte)) | ||
| 2225 | return 0; | ||
| 2226 | swp = pte_to_swp_entry(pte); | ||
| 2227 | if (non_swap_entry(swp) && is_migration_entry(swp)) { | ||
| 2228 | return 1; | ||
| 2229 | } else | ||
| 2230 | return 0; | ||
| 2231 | } | ||
| 2232 | |||
| 2220 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | 2233 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) |
| 2221 | { | 2234 | { |
| 2222 | swp_entry_t swp; | 2235 | swp_entry_t swp; |
| @@ -2648,7 +2661,10 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2648 | ptep = huge_pte_offset(mm, address); | 2661 | ptep = huge_pte_offset(mm, address); |
| 2649 | if (ptep) { | 2662 | if (ptep) { |
| 2650 | entry = huge_ptep_get(ptep); | 2663 | entry = huge_ptep_get(ptep); |
| 2651 | if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | 2664 | if (unlikely(is_hugetlb_entry_migration(entry))) { |
| 2665 | migration_entry_wait(mm, (pmd_t *)ptep, address); | ||
| 2666 | return 0; | ||
| 2667 | } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) | ||
| 2652 | return VM_FAULT_HWPOISON; | 2668 | return VM_FAULT_HWPOISON; |
| 2653 | } | 2669 | } |
| 2654 | 2670 | ||
diff --git a/mm/migrate.c b/mm/migrate.c index 38e7cad782f4..55dbc45880c6 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
| 33 | #include <linux/memcontrol.h> | 33 | #include <linux/memcontrol.h> |
| 34 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
| 35 | #include <linux/hugetlb.h> | ||
| 35 | #include <linux/gfp.h> | 36 | #include <linux/gfp.h> |
| 36 | 37 | ||
| 37 | #include "internal.h" | 38 | #include "internal.h" |
| @@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
| 95 | pte_t *ptep, pte; | 96 | pte_t *ptep, pte; |
| 96 | spinlock_t *ptl; | 97 | spinlock_t *ptl; |
| 97 | 98 | ||
| 98 | pgd = pgd_offset(mm, addr); | 99 | if (unlikely(PageHuge(new))) { |
| 99 | if (!pgd_present(*pgd)) | 100 | ptep = huge_pte_offset(mm, addr); |
| 100 | goto out; | 101 | if (!ptep) |
| 102 | goto out; | ||
| 103 | ptl = &mm->page_table_lock; | ||
| 104 | } else { | ||
| 105 | pgd = pgd_offset(mm, addr); | ||
| 106 | if (!pgd_present(*pgd)) | ||
| 107 | goto out; | ||
| 101 | 108 | ||
| 102 | pud = pud_offset(pgd, addr); | 109 | pud = pud_offset(pgd, addr); |
| 103 | if (!pud_present(*pud)) | 110 | if (!pud_present(*pud)) |
| 104 | goto out; | 111 | goto out; |
| 105 | 112 | ||
| 106 | pmd = pmd_offset(pud, addr); | 113 | pmd = pmd_offset(pud, addr); |
| 107 | if (!pmd_present(*pmd)) | 114 | if (!pmd_present(*pmd)) |
| 108 | goto out; | 115 | goto out; |
| 109 | 116 | ||
| 110 | ptep = pte_offset_map(pmd, addr); | 117 | ptep = pte_offset_map(pmd, addr); |
| 111 | 118 | ||
| 112 | if (!is_swap_pte(*ptep)) { | 119 | if (!is_swap_pte(*ptep)) { |
| 113 | pte_unmap(ptep); | 120 | pte_unmap(ptep); |
| 114 | goto out; | 121 | goto out; |
| 115 | } | 122 | } |
| 123 | |||
| 124 | ptl = pte_lockptr(mm, pmd); | ||
| 125 | } | ||
| 116 | 126 | ||
| 117 | ptl = pte_lockptr(mm, pmd); | ||
| 118 | spin_lock(ptl); | 127 | spin_lock(ptl); |
| 119 | pte = *ptep; | 128 | pte = *ptep; |
| 120 | if (!is_swap_pte(pte)) | 129 | if (!is_swap_pte(pte)) |
| @@ -130,10 +139,17 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
| 130 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 139 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
| 131 | if (is_write_migration_entry(entry)) | 140 | if (is_write_migration_entry(entry)) |
| 132 | pte = pte_mkwrite(pte); | 141 | pte = pte_mkwrite(pte); |
| 142 | if (PageHuge(new)) | ||
| 143 | pte = pte_mkhuge(pte); | ||
| 133 | flush_cache_page(vma, addr, pte_pfn(pte)); | 144 | flush_cache_page(vma, addr, pte_pfn(pte)); |
| 134 | set_pte_at(mm, addr, ptep, pte); | 145 | set_pte_at(mm, addr, ptep, pte); |
| 135 | 146 | ||
| 136 | if (PageAnon(new)) | 147 | if (PageHuge(new)) { |
| 148 | if (PageAnon(new)) | ||
| 149 | hugepage_add_anon_rmap(new, vma, addr); | ||
| 150 | else | ||
| 151 | page_dup_rmap(new); | ||
| 152 | } else if (PageAnon(new)) | ||
| 137 | page_add_anon_rmap(new, vma, addr); | 153 | page_add_anon_rmap(new, vma, addr); |
| 138 | else | 154 | else |
| 139 | page_add_file_rmap(new); | 155 | page_add_file_rmap(new); |
| @@ -276,11 +292,59 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
| 276 | } | 292 | } |
| 277 | 293 | ||
| 278 | /* | 294 | /* |
| 295 | * The expected number of remaining references is the same as that | ||
| 296 | * of migrate_page_move_mapping(). | ||
| 297 | */ | ||
| 298 | int migrate_huge_page_move_mapping(struct address_space *mapping, | ||
| 299 | struct page *newpage, struct page *page) | ||
| 300 | { | ||
| 301 | int expected_count; | ||
| 302 | void **pslot; | ||
| 303 | |||
| 304 | if (!mapping) { | ||
| 305 | if (page_count(page) != 1) | ||
| 306 | return -EAGAIN; | ||
| 307 | return 0; | ||
| 308 | } | ||
| 309 | |||
| 310 | spin_lock_irq(&mapping->tree_lock); | ||
| 311 | |||
| 312 | pslot = radix_tree_lookup_slot(&mapping->page_tree, | ||
| 313 | page_index(page)); | ||
| 314 | |||
| 315 | expected_count = 2 + page_has_private(page); | ||
| 316 | if (page_count(page) != expected_count || | ||
| 317 | (struct page *)radix_tree_deref_slot(pslot) != page) { | ||
| 318 | spin_unlock_irq(&mapping->tree_lock); | ||
| 319 | return -EAGAIN; | ||
| 320 | } | ||
| 321 | |||
| 322 | if (!page_freeze_refs(page, expected_count)) { | ||
| 323 | spin_unlock_irq(&mapping->tree_lock); | ||
| 324 | return -EAGAIN; | ||
| 325 | } | ||
| 326 | |||
| 327 | get_page(newpage); | ||
| 328 | |||
| 329 | radix_tree_replace_slot(pslot, newpage); | ||
| 330 | |||
| 331 | page_unfreeze_refs(page, expected_count); | ||
| 332 | |||
| 333 | __put_page(page); | ||
| 334 | |||
| 335 | spin_unlock_irq(&mapping->tree_lock); | ||
| 336 | return 0; | ||
| 337 | } | ||
| 338 | |||
| 339 | /* | ||
| 279 | * Copy the page to its new location | 340 | * Copy the page to its new location |
| 280 | */ | 341 | */ |
| 281 | static void migrate_page_copy(struct page *newpage, struct page *page) | 342 | void migrate_page_copy(struct page *newpage, struct page *page) |
| 282 | { | 343 | { |
| 283 | copy_highpage(newpage, page); | 344 | if (PageHuge(page)) |
| 345 | copy_huge_page(newpage, page); | ||
| 346 | else | ||
| 347 | copy_highpage(newpage, page); | ||
| 284 | 348 | ||
| 285 | if (PageError(page)) | 349 | if (PageError(page)) |
| 286 | SetPageError(newpage); | 350 | SetPageError(newpage); |
| @@ -724,6 +788,92 @@ move_newpage: | |||
| 724 | } | 788 | } |
| 725 | 789 | ||
| 726 | /* | 790 | /* |
| 791 | * Counterpart of unmap_and_move_page() for hugepage migration. | ||
| 792 | * | ||
| 793 | * This function doesn't wait the completion of hugepage I/O | ||
| 794 | * because there is no race between I/O and migration for hugepage. | ||
| 795 | * Note that currently hugepage I/O occurs only in direct I/O | ||
| 796 | * where no lock is held and PG_writeback is irrelevant, | ||
| 797 | * and writeback status of all subpages are counted in the reference | ||
| 798 | * count of the head page (i.e. if all subpages of a 2MB hugepage are | ||
| 799 | * under direct I/O, the reference of the head page is 512 and a bit more.) | ||
| 800 | * This means that when we try to migrate hugepage whose subpages are | ||
| 801 | * doing direct I/O, some references remain after try_to_unmap() and | ||
| 802 | * hugepage migration fails without data corruption. | ||
| 803 | * | ||
| 804 | * There is also no race when direct I/O is issued on the page under migration, | ||
| 805 | * because then pte is replaced with migration swap entry and direct I/O code | ||
| 806 | * will wait in the page fault for migration to complete. | ||
| 807 | */ | ||
| 808 | static int unmap_and_move_huge_page(new_page_t get_new_page, | ||
| 809 | unsigned long private, struct page *hpage, | ||
| 810 | int force, int offlining) | ||
| 811 | { | ||
| 812 | int rc = 0; | ||
| 813 | int *result = NULL; | ||
| 814 | struct page *new_hpage = get_new_page(hpage, private, &result); | ||
| 815 | int rcu_locked = 0; | ||
| 816 | struct anon_vma *anon_vma = NULL; | ||
| 817 | |||
| 818 | if (!new_hpage) | ||
| 819 | return -ENOMEM; | ||
| 820 | |||
| 821 | rc = -EAGAIN; | ||
| 822 | |||
| 823 | if (!trylock_page(hpage)) { | ||
| 824 | if (!force) | ||
| 825 | goto out; | ||
| 826 | lock_page(hpage); | ||
| 827 | } | ||
| 828 | |||
| 829 | if (PageAnon(hpage)) { | ||
| 830 | rcu_read_lock(); | ||
| 831 | rcu_locked = 1; | ||
| 832 | |||
| 833 | if (page_mapped(hpage)) { | ||
| 834 | anon_vma = page_anon_vma(hpage); | ||
| 835 | atomic_inc(&anon_vma->external_refcount); | ||
| 836 | } | ||
| 837 | } | ||
| 838 | |||
| 839 | try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); | ||
| 840 | |||
| 841 | if (!page_mapped(hpage)) | ||
| 842 | rc = move_to_new_page(new_hpage, hpage, 1); | ||
| 843 | |||
| 844 | if (rc) | ||
| 845 | remove_migration_ptes(hpage, hpage); | ||
| 846 | |||
| 847 | if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, | ||
| 848 | &anon_vma->lock)) { | ||
| 849 | int empty = list_empty(&anon_vma->head); | ||
| 850 | spin_unlock(&anon_vma->lock); | ||
| 851 | if (empty) | ||
| 852 | anon_vma_free(anon_vma); | ||
| 853 | } | ||
| 854 | |||
| 855 | if (rcu_locked) | ||
| 856 | rcu_read_unlock(); | ||
| 857 | out: | ||
| 858 | unlock_page(hpage); | ||
| 859 | |||
| 860 | if (rc != -EAGAIN) { | ||
| 861 | list_del(&hpage->lru); | ||
| 862 | put_page(hpage); | ||
| 863 | } | ||
| 864 | |||
| 865 | put_page(new_hpage); | ||
| 866 | |||
| 867 | if (result) { | ||
| 868 | if (rc) | ||
| 869 | *result = rc; | ||
| 870 | else | ||
| 871 | *result = page_to_nid(new_hpage); | ||
| 872 | } | ||
| 873 | return rc; | ||
| 874 | } | ||
| 875 | |||
| 876 | /* | ||
| 727 | * migrate_pages | 877 | * migrate_pages |
| 728 | * | 878 | * |
| 729 | * The function takes one list of pages to migrate and a function | 879 | * The function takes one list of pages to migrate and a function |
| @@ -788,6 +938,52 @@ out: | |||
| 788 | return nr_failed + retry; | 938 | return nr_failed + retry; |
| 789 | } | 939 | } |
| 790 | 940 | ||
| 941 | int migrate_huge_pages(struct list_head *from, | ||
| 942 | new_page_t get_new_page, unsigned long private, int offlining) | ||
| 943 | { | ||
| 944 | int retry = 1; | ||
| 945 | int nr_failed = 0; | ||
| 946 | int pass = 0; | ||
| 947 | struct page *page; | ||
| 948 | struct page *page2; | ||
| 949 | int rc; | ||
| 950 | |||
| 951 | for (pass = 0; pass < 10 && retry; pass++) { | ||
| 952 | retry = 0; | ||
| 953 | |||
| 954 | list_for_each_entry_safe(page, page2, from, lru) { | ||
| 955 | cond_resched(); | ||
| 956 | |||
| 957 | rc = unmap_and_move_huge_page(get_new_page, | ||
| 958 | private, page, pass > 2, offlining); | ||
| 959 | |||
| 960 | switch(rc) { | ||
| 961 | case -ENOMEM: | ||
| 962 | goto out; | ||
| 963 | case -EAGAIN: | ||
| 964 | retry++; | ||
| 965 | break; | ||
| 966 | case 0: | ||
| 967 | break; | ||
| 968 | default: | ||
| 969 | /* Permanent failure */ | ||
| 970 | nr_failed++; | ||
| 971 | break; | ||
| 972 | } | ||
| 973 | } | ||
| 974 | } | ||
| 975 | rc = 0; | ||
| 976 | out: | ||
| 977 | |||
| 978 | list_for_each_entry_safe(page, page2, from, lru) | ||
| 979 | put_page(page); | ||
| 980 | |||
| 981 | if (rc) | ||
| 982 | return rc; | ||
| 983 | |||
| 984 | return nr_failed + retry; | ||
| 985 | } | ||
| 986 | |||
| 791 | #ifdef CONFIG_NUMA | 987 | #ifdef CONFIG_NUMA |
| 792 | /* | 988 | /* |
| 793 | * Move a list of individual pages | 989 | * Move a list of individual pages |
