aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/hugetlbfs/inode.c15
-rw-r--r--include/linux/migrate.h16
-rw-r--r--mm/hugetlb.c18
-rw-r--r--mm/migrate.c232
4 files changed, 262 insertions, 19 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 6e5bd42f3860..1f7ca505d48e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/statfs.h> 31#include <linux/statfs.h>
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/magic.h> 33#include <linux/magic.h>
34#include <linux/migrate.h>
34 35
35#include <asm/uaccess.h> 36#include <asm/uaccess.h>
36 37
@@ -573,6 +574,19 @@ static int hugetlbfs_set_page_dirty(struct page *page)
573 return 0; 574 return 0;
574} 575}
575 576
577static int hugetlbfs_migrate_page(struct address_space *mapping,
578 struct page *newpage, struct page *page)
579{
580 int rc;
581
582 rc = migrate_huge_page_move_mapping(mapping, newpage, page);
583 if (rc)
584 return rc;
585 migrate_page_copy(newpage, page);
586
587 return 0;
588}
589
576static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 590static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
577{ 591{
578 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 592 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
@@ -659,6 +673,7 @@ static const struct address_space_operations hugetlbfs_aops = {
659 .write_begin = hugetlbfs_write_begin, 673 .write_begin = hugetlbfs_write_begin,
660 .write_end = hugetlbfs_write_end, 674 .write_end = hugetlbfs_write_end,
661 .set_page_dirty = hugetlbfs_set_page_dirty, 675 .set_page_dirty = hugetlbfs_set_page_dirty,
676 .migratepage = hugetlbfs_migrate_page,
662}; 677};
663 678
664 679
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 7238231b8dd4..3c1941e40e61 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -14,6 +14,8 @@ extern int migrate_page(struct address_space *,
14 struct page *, struct page *); 14 struct page *, struct page *);
15extern int migrate_pages(struct list_head *l, new_page_t x, 15extern int migrate_pages(struct list_head *l, new_page_t x,
16 unsigned long private, int offlining); 16 unsigned long private, int offlining);
17extern int migrate_huge_pages(struct list_head *l, new_page_t x,
18 unsigned long private, int offlining);
17 19
18extern int fail_migrate_page(struct address_space *, 20extern int fail_migrate_page(struct address_space *,
19 struct page *, struct page *); 21 struct page *, struct page *);
@@ -23,12 +25,17 @@ extern int migrate_prep_local(void);
23extern int migrate_vmas(struct mm_struct *mm, 25extern int migrate_vmas(struct mm_struct *mm,
24 const nodemask_t *from, const nodemask_t *to, 26 const nodemask_t *from, const nodemask_t *to,
25 unsigned long flags); 27 unsigned long flags);
28extern void migrate_page_copy(struct page *newpage, struct page *page);
29extern int migrate_huge_page_move_mapping(struct address_space *mapping,
30 struct page *newpage, struct page *page);
26#else 31#else
27#define PAGE_MIGRATION 0 32#define PAGE_MIGRATION 0
28 33
29static inline void putback_lru_pages(struct list_head *l) {} 34static inline void putback_lru_pages(struct list_head *l) {}
30static inline int migrate_pages(struct list_head *l, new_page_t x, 35static inline int migrate_pages(struct list_head *l, new_page_t x,
31 unsigned long private, int offlining) { return -ENOSYS; } 36 unsigned long private, int offlining) { return -ENOSYS; }
37static inline int migrate_huge_pages(struct list_head *l, new_page_t x,
38 unsigned long private, int offlining) { return -ENOSYS; }
32 39
33static inline int migrate_prep(void) { return -ENOSYS; } 40static inline int migrate_prep(void) { return -ENOSYS; }
34static inline int migrate_prep_local(void) { return -ENOSYS; } 41static inline int migrate_prep_local(void) { return -ENOSYS; }
@@ -40,6 +47,15 @@ static inline int migrate_vmas(struct mm_struct *mm,
40 return -ENOSYS; 47 return -ENOSYS;
41} 48}
42 49
50static inline void migrate_page_copy(struct page *newpage,
51 struct page *page) {}
52
53extern int migrate_huge_page_move_mapping(struct address_space *mapping,
54 struct page *newpage, struct page *page)
55{
56 return -ENOSYS;
57}
58
43/* Possible settings for the migrate_page() method in address_operations */ 59/* Possible settings for the migrate_page() method in address_operations */
44#define migrate_page NULL 60#define migrate_page NULL
45#define fail_migrate_page NULL 61#define fail_migrate_page NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a73dbdcb89eb..0fa9de8361bd 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2217,6 +2217,19 @@ nomem:
2217 return -ENOMEM; 2217 return -ENOMEM;
2218} 2218}
2219 2219
2220static int is_hugetlb_entry_migration(pte_t pte)
2221{
2222 swp_entry_t swp;
2223
2224 if (huge_pte_none(pte) || pte_present(pte))
2225 return 0;
2226 swp = pte_to_swp_entry(pte);
2227 if (non_swap_entry(swp) && is_migration_entry(swp)) {
2228 return 1;
2229 } else
2230 return 0;
2231}
2232
2220static int is_hugetlb_entry_hwpoisoned(pte_t pte) 2233static int is_hugetlb_entry_hwpoisoned(pte_t pte)
2221{ 2234{
2222 swp_entry_t swp; 2235 swp_entry_t swp;
@@ -2648,7 +2661,10 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2648 ptep = huge_pte_offset(mm, address); 2661 ptep = huge_pte_offset(mm, address);
2649 if (ptep) { 2662 if (ptep) {
2650 entry = huge_ptep_get(ptep); 2663 entry = huge_ptep_get(ptep);
2651 if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) 2664 if (unlikely(is_hugetlb_entry_migration(entry))) {
2665 migration_entry_wait(mm, (pmd_t *)ptep, address);
2666 return 0;
2667 } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
2652 return VM_FAULT_HWPOISON; 2668 return VM_FAULT_HWPOISON;
2653 } 2669 }
2654 2670
diff --git a/mm/migrate.c b/mm/migrate.c
index 38e7cad782f4..55dbc45880c6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -32,6 +32,7 @@
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/memcontrol.h> 33#include <linux/memcontrol.h>
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/hugetlb.h>
35#include <linux/gfp.h> 36#include <linux/gfp.h>
36 37
37#include "internal.h" 38#include "internal.h"
@@ -95,26 +96,34 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
95 pte_t *ptep, pte; 96 pte_t *ptep, pte;
96 spinlock_t *ptl; 97 spinlock_t *ptl;
97 98
98 pgd = pgd_offset(mm, addr); 99 if (unlikely(PageHuge(new))) {
99 if (!pgd_present(*pgd)) 100 ptep = huge_pte_offset(mm, addr);
100 goto out; 101 if (!ptep)
102 goto out;
103 ptl = &mm->page_table_lock;
104 } else {
105 pgd = pgd_offset(mm, addr);
106 if (!pgd_present(*pgd))
107 goto out;
101 108
102 pud = pud_offset(pgd, addr); 109 pud = pud_offset(pgd, addr);
103 if (!pud_present(*pud)) 110 if (!pud_present(*pud))
104 goto out; 111 goto out;
105 112
106 pmd = pmd_offset(pud, addr); 113 pmd = pmd_offset(pud, addr);
107 if (!pmd_present(*pmd)) 114 if (!pmd_present(*pmd))
108 goto out; 115 goto out;
109 116
110 ptep = pte_offset_map(pmd, addr); 117 ptep = pte_offset_map(pmd, addr);
111 118
112 if (!is_swap_pte(*ptep)) { 119 if (!is_swap_pte(*ptep)) {
113 pte_unmap(ptep); 120 pte_unmap(ptep);
114 goto out; 121 goto out;
115 } 122 }
123
124 ptl = pte_lockptr(mm, pmd);
125 }
116 126
117 ptl = pte_lockptr(mm, pmd);
118 spin_lock(ptl); 127 spin_lock(ptl);
119 pte = *ptep; 128 pte = *ptep;
120 if (!is_swap_pte(pte)) 129 if (!is_swap_pte(pte))
@@ -130,10 +139,17 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
130 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 139 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
131 if (is_write_migration_entry(entry)) 140 if (is_write_migration_entry(entry))
132 pte = pte_mkwrite(pte); 141 pte = pte_mkwrite(pte);
142 if (PageHuge(new))
143 pte = pte_mkhuge(pte);
133 flush_cache_page(vma, addr, pte_pfn(pte)); 144 flush_cache_page(vma, addr, pte_pfn(pte));
134 set_pte_at(mm, addr, ptep, pte); 145 set_pte_at(mm, addr, ptep, pte);
135 146
136 if (PageAnon(new)) 147 if (PageHuge(new)) {
148 if (PageAnon(new))
149 hugepage_add_anon_rmap(new, vma, addr);
150 else
151 page_dup_rmap(new);
152 } else if (PageAnon(new))
137 page_add_anon_rmap(new, vma, addr); 153 page_add_anon_rmap(new, vma, addr);
138 else 154 else
139 page_add_file_rmap(new); 155 page_add_file_rmap(new);
@@ -276,11 +292,59 @@ static int migrate_page_move_mapping(struct address_space *mapping,
276} 292}
277 293
278/* 294/*
295 * The expected number of remaining references is the same as that
296 * of migrate_page_move_mapping().
297 */
298int migrate_huge_page_move_mapping(struct address_space *mapping,
299 struct page *newpage, struct page *page)
300{
301 int expected_count;
302 void **pslot;
303
304 if (!mapping) {
305 if (page_count(page) != 1)
306 return -EAGAIN;
307 return 0;
308 }
309
310 spin_lock_irq(&mapping->tree_lock);
311
312 pslot = radix_tree_lookup_slot(&mapping->page_tree,
313 page_index(page));
314
315 expected_count = 2 + page_has_private(page);
316 if (page_count(page) != expected_count ||
317 (struct page *)radix_tree_deref_slot(pslot) != page) {
318 spin_unlock_irq(&mapping->tree_lock);
319 return -EAGAIN;
320 }
321
322 if (!page_freeze_refs(page, expected_count)) {
323 spin_unlock_irq(&mapping->tree_lock);
324 return -EAGAIN;
325 }
326
327 get_page(newpage);
328
329 radix_tree_replace_slot(pslot, newpage);
330
331 page_unfreeze_refs(page, expected_count);
332
333 __put_page(page);
334
335 spin_unlock_irq(&mapping->tree_lock);
336 return 0;
337}
338
339/*
279 * Copy the page to its new location 340 * Copy the page to its new location
280 */ 341 */
281static void migrate_page_copy(struct page *newpage, struct page *page) 342void migrate_page_copy(struct page *newpage, struct page *page)
282{ 343{
283 copy_highpage(newpage, page); 344 if (PageHuge(page))
345 copy_huge_page(newpage, page);
346 else
347 copy_highpage(newpage, page);
284 348
285 if (PageError(page)) 349 if (PageError(page))
286 SetPageError(newpage); 350 SetPageError(newpage);
@@ -724,6 +788,92 @@ move_newpage:
724} 788}
725 789
726/* 790/*
791 * Counterpart of unmap_and_move_page() for hugepage migration.
792 *
793 * This function doesn't wait the completion of hugepage I/O
794 * because there is no race between I/O and migration for hugepage.
795 * Note that currently hugepage I/O occurs only in direct I/O
796 * where no lock is held and PG_writeback is irrelevant,
797 * and writeback status of all subpages are counted in the reference
798 * count of the head page (i.e. if all subpages of a 2MB hugepage are
799 * under direct I/O, the reference of the head page is 512 and a bit more.)
800 * This means that when we try to migrate hugepage whose subpages are
801 * doing direct I/O, some references remain after try_to_unmap() and
802 * hugepage migration fails without data corruption.
803 *
804 * There is also no race when direct I/O is issued on the page under migration,
805 * because then pte is replaced with migration swap entry and direct I/O code
806 * will wait in the page fault for migration to complete.
807 */
808static int unmap_and_move_huge_page(new_page_t get_new_page,
809 unsigned long private, struct page *hpage,
810 int force, int offlining)
811{
812 int rc = 0;
813 int *result = NULL;
814 struct page *new_hpage = get_new_page(hpage, private, &result);
815 int rcu_locked = 0;
816 struct anon_vma *anon_vma = NULL;
817
818 if (!new_hpage)
819 return -ENOMEM;
820
821 rc = -EAGAIN;
822
823 if (!trylock_page(hpage)) {
824 if (!force)
825 goto out;
826 lock_page(hpage);
827 }
828
829 if (PageAnon(hpage)) {
830 rcu_read_lock();
831 rcu_locked = 1;
832
833 if (page_mapped(hpage)) {
834 anon_vma = page_anon_vma(hpage);
835 atomic_inc(&anon_vma->external_refcount);
836 }
837 }
838
839 try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
840
841 if (!page_mapped(hpage))
842 rc = move_to_new_page(new_hpage, hpage, 1);
843
844 if (rc)
845 remove_migration_ptes(hpage, hpage);
846
847 if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount,
848 &anon_vma->lock)) {
849 int empty = list_empty(&anon_vma->head);
850 spin_unlock(&anon_vma->lock);
851 if (empty)
852 anon_vma_free(anon_vma);
853 }
854
855 if (rcu_locked)
856 rcu_read_unlock();
857out:
858 unlock_page(hpage);
859
860 if (rc != -EAGAIN) {
861 list_del(&hpage->lru);
862 put_page(hpage);
863 }
864
865 put_page(new_hpage);
866
867 if (result) {
868 if (rc)
869 *result = rc;
870 else
871 *result = page_to_nid(new_hpage);
872 }
873 return rc;
874}
875
876/*
727 * migrate_pages 877 * migrate_pages
728 * 878 *
729 * The function takes one list of pages to migrate and a function 879 * The function takes one list of pages to migrate and a function
@@ -788,6 +938,52 @@ out:
788 return nr_failed + retry; 938 return nr_failed + retry;
789} 939}
790 940
941int migrate_huge_pages(struct list_head *from,
942 new_page_t get_new_page, unsigned long private, int offlining)
943{
944 int retry = 1;
945 int nr_failed = 0;
946 int pass = 0;
947 struct page *page;
948 struct page *page2;
949 int rc;
950
951 for (pass = 0; pass < 10 && retry; pass++) {
952 retry = 0;
953
954 list_for_each_entry_safe(page, page2, from, lru) {
955 cond_resched();
956
957 rc = unmap_and_move_huge_page(get_new_page,
958 private, page, pass > 2, offlining);
959
960 switch(rc) {
961 case -ENOMEM:
962 goto out;
963 case -EAGAIN:
964 retry++;
965 break;
966 case 0:
967 break;
968 default:
969 /* Permanent failure */
970 nr_failed++;
971 break;
972 }
973 }
974 }
975 rc = 0;
976out:
977
978 list_for_each_entry_safe(page, page2, from, lru)
979 put_page(page);
980
981 if (rc)
982 return rc;
983
984 return nr_failed + retry;
985}
986
791#ifdef CONFIG_NUMA 987#ifdef CONFIG_NUMA
792/* 988/*
793 * Move a list of individual pages 989 * Move a list of individual pages