diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 177 |
1 files changed, 65 insertions, 112 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 7dbcb22316d2..d3f3f7f81075 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/mm_inline.h> | 21 | #include <linux/mm_inline.h> |
22 | #include <linux/nsproxy.h> | 22 | #include <linux/nsproxy.h> |
23 | #include <linux/pagevec.h> | 23 | #include <linux/pagevec.h> |
24 | #include <linux/ksm.h> | ||
24 | #include <linux/rmap.h> | 25 | #include <linux/rmap.h> |
25 | #include <linux/topology.h> | 26 | #include <linux/topology.h> |
26 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
@@ -31,6 +32,7 @@ | |||
31 | #include <linux/security.h> | 32 | #include <linux/security.h> |
32 | #include <linux/memcontrol.h> | 33 | #include <linux/memcontrol.h> |
33 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
35 | #include <linux/gfp.h> | ||
34 | 36 | ||
35 | #include "internal.h" | 37 | #include "internal.h" |
36 | 38 | ||
@@ -78,8 +80,8 @@ int putback_lru_pages(struct list_head *l) | |||
78 | /* | 80 | /* |
79 | * Restore a potential migration pte to a working pte entry | 81 | * Restore a potential migration pte to a working pte entry |
80 | */ | 82 | */ |
81 | static void remove_migration_pte(struct vm_area_struct *vma, | 83 | static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, |
82 | struct page *old, struct page *new) | 84 | unsigned long addr, void *old) |
83 | { | 85 | { |
84 | struct mm_struct *mm = vma->vm_mm; | 86 | struct mm_struct *mm = vma->vm_mm; |
85 | swp_entry_t entry; | 87 | swp_entry_t entry; |
@@ -88,40 +90,37 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
88 | pmd_t *pmd; | 90 | pmd_t *pmd; |
89 | pte_t *ptep, pte; | 91 | pte_t *ptep, pte; |
90 | spinlock_t *ptl; | 92 | spinlock_t *ptl; |
91 | unsigned long addr = page_address_in_vma(new, vma); | ||
92 | |||
93 | if (addr == -EFAULT) | ||
94 | return; | ||
95 | 93 | ||
96 | pgd = pgd_offset(mm, addr); | 94 | pgd = pgd_offset(mm, addr); |
97 | if (!pgd_present(*pgd)) | 95 | if (!pgd_present(*pgd)) |
98 | return; | 96 | goto out; |
99 | 97 | ||
100 | pud = pud_offset(pgd, addr); | 98 | pud = pud_offset(pgd, addr); |
101 | if (!pud_present(*pud)) | 99 | if (!pud_present(*pud)) |
102 | return; | 100 | goto out; |
103 | 101 | ||
104 | pmd = pmd_offset(pud, addr); | 102 | pmd = pmd_offset(pud, addr); |
105 | if (!pmd_present(*pmd)) | 103 | if (!pmd_present(*pmd)) |
106 | return; | 104 | goto out; |
107 | 105 | ||
108 | ptep = pte_offset_map(pmd, addr); | 106 | ptep = pte_offset_map(pmd, addr); |
109 | 107 | ||
110 | if (!is_swap_pte(*ptep)) { | 108 | if (!is_swap_pte(*ptep)) { |
111 | pte_unmap(ptep); | 109 | pte_unmap(ptep); |
112 | return; | 110 | goto out; |
113 | } | 111 | } |
114 | 112 | ||
115 | ptl = pte_lockptr(mm, pmd); | 113 | ptl = pte_lockptr(mm, pmd); |
116 | spin_lock(ptl); | 114 | spin_lock(ptl); |
117 | pte = *ptep; | 115 | pte = *ptep; |
118 | if (!is_swap_pte(pte)) | 116 | if (!is_swap_pte(pte)) |
119 | goto out; | 117 | goto unlock; |
120 | 118 | ||
121 | entry = pte_to_swp_entry(pte); | 119 | entry = pte_to_swp_entry(pte); |
122 | 120 | ||
123 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 121 | if (!is_migration_entry(entry) || |
124 | goto out; | 122 | migration_entry_to_page(entry) != old) |
123 | goto unlock; | ||
125 | 124 | ||
126 | get_page(new); | 125 | get_page(new); |
127 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 126 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
@@ -136,59 +135,11 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
136 | page_add_file_rmap(new); | 135 | page_add_file_rmap(new); |
137 | 136 | ||
138 | /* No need to invalidate - it was non-present before */ | 137 | /* No need to invalidate - it was non-present before */ |
139 | update_mmu_cache(vma, addr, pte); | 138 | update_mmu_cache(vma, addr, ptep); |
140 | 139 | unlock: | |
141 | out: | ||
142 | pte_unmap_unlock(ptep, ptl); | 140 | pte_unmap_unlock(ptep, ptl); |
143 | } | 141 | out: |
144 | 142 | return SWAP_AGAIN; | |
145 | /* | ||
146 | * Note that remove_file_migration_ptes will only work on regular mappings, | ||
147 | * Nonlinear mappings do not use migration entries. | ||
148 | */ | ||
149 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
150 | { | ||
151 | struct vm_area_struct *vma; | ||
152 | struct address_space *mapping = new->mapping; | ||
153 | struct prio_tree_iter iter; | ||
154 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
155 | |||
156 | if (!mapping) | ||
157 | return; | ||
158 | |||
159 | spin_lock(&mapping->i_mmap_lock); | ||
160 | |||
161 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
162 | remove_migration_pte(vma, old, new); | ||
163 | |||
164 | spin_unlock(&mapping->i_mmap_lock); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Must hold mmap_sem lock on at least one of the vmas containing | ||
169 | * the page so that the anon_vma cannot vanish. | ||
170 | */ | ||
171 | static void remove_anon_migration_ptes(struct page *old, struct page *new) | ||
172 | { | ||
173 | struct anon_vma *anon_vma; | ||
174 | struct vm_area_struct *vma; | ||
175 | unsigned long mapping; | ||
176 | |||
177 | mapping = (unsigned long)new->mapping; | ||
178 | |||
179 | if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) | ||
180 | return; | ||
181 | |||
182 | /* | ||
183 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. | ||
184 | */ | ||
185 | anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); | ||
186 | spin_lock(&anon_vma->lock); | ||
187 | |||
188 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | ||
189 | remove_migration_pte(vma, old, new); | ||
190 | |||
191 | spin_unlock(&anon_vma->lock); | ||
192 | } | 143 | } |
193 | 144 | ||
194 | /* | 145 | /* |
@@ -197,10 +148,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new) | |||
197 | */ | 148 | */ |
198 | static void remove_migration_ptes(struct page *old, struct page *new) | 149 | static void remove_migration_ptes(struct page *old, struct page *new) |
199 | { | 150 | { |
200 | if (PageAnon(new)) | 151 | rmap_walk(new, remove_migration_pte, old); |
201 | remove_anon_migration_ptes(old, new); | ||
202 | else | ||
203 | remove_file_migration_ptes(old, new); | ||
204 | } | 152 | } |
205 | 153 | ||
206 | /* | 154 | /* |
@@ -328,8 +276,6 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
328 | */ | 276 | */ |
329 | static void migrate_page_copy(struct page *newpage, struct page *page) | 277 | static void migrate_page_copy(struct page *newpage, struct page *page) |
330 | { | 278 | { |
331 | int anon; | ||
332 | |||
333 | copy_highpage(newpage, page); | 279 | copy_highpage(newpage, page); |
334 | 280 | ||
335 | if (PageError(page)) | 281 | if (PageError(page)) |
@@ -341,8 +287,8 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
341 | if (TestClearPageActive(page)) { | 287 | if (TestClearPageActive(page)) { |
342 | VM_BUG_ON(PageUnevictable(page)); | 288 | VM_BUG_ON(PageUnevictable(page)); |
343 | SetPageActive(newpage); | 289 | SetPageActive(newpage); |
344 | } else | 290 | } else if (TestClearPageUnevictable(page)) |
345 | unevictable_migrate_page(newpage, page); | 291 | SetPageUnevictable(newpage); |
346 | if (PageChecked(page)) | 292 | if (PageChecked(page)) |
347 | SetPageChecked(newpage); | 293 | SetPageChecked(newpage); |
348 | if (PageMappedToDisk(page)) | 294 | if (PageMappedToDisk(page)) |
@@ -361,12 +307,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
361 | } | 307 | } |
362 | 308 | ||
363 | mlock_migrate_page(newpage, page); | 309 | mlock_migrate_page(newpage, page); |
310 | ksm_migrate_page(newpage, page); | ||
364 | 311 | ||
365 | ClearPageSwapCache(page); | 312 | ClearPageSwapCache(page); |
366 | ClearPagePrivate(page); | 313 | ClearPagePrivate(page); |
367 | set_page_private(page, 0); | 314 | set_page_private(page, 0); |
368 | /* page->mapping contains a flag for PageAnon() */ | ||
369 | anon = PageAnon(page); | ||
370 | page->mapping = NULL; | 315 | page->mapping = NULL; |
371 | 316 | ||
372 | /* | 317 | /* |
@@ -580,9 +525,9 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
580 | else | 525 | else |
581 | rc = fallback_migrate_page(mapping, newpage, page); | 526 | rc = fallback_migrate_page(mapping, newpage, page); |
582 | 527 | ||
583 | if (!rc) { | 528 | if (!rc) |
584 | remove_migration_ptes(page, newpage); | 529 | remove_migration_ptes(page, newpage); |
585 | } else | 530 | else |
586 | newpage->mapping = NULL; | 531 | newpage->mapping = NULL; |
587 | 532 | ||
588 | unlock_page(newpage); | 533 | unlock_page(newpage); |
@@ -595,7 +540,7 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
595 | * to the newly allocated page in newpage. | 540 | * to the newly allocated page in newpage. |
596 | */ | 541 | */ |
597 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, | 542 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, |
598 | struct page *page, int force) | 543 | struct page *page, int force, int offlining) |
599 | { | 544 | { |
600 | int rc = 0; | 545 | int rc = 0; |
601 | int *result = NULL; | 546 | int *result = NULL; |
@@ -621,6 +566,20 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
621 | lock_page(page); | 566 | lock_page(page); |
622 | } | 567 | } |
623 | 568 | ||
569 | /* | ||
570 | * Only memory hotplug's offline_pages() caller has locked out KSM, | ||
571 | * and can safely migrate a KSM page. The other cases have skipped | ||
572 | * PageKsm along with PageReserved - but it is only now when we have | ||
573 | * the page lock that we can be certain it will not go KSM beneath us | ||
574 | * (KSM will not upgrade a page from PageAnon to PageKsm when it sees | ||
575 | * its pagecount raised, but only here do we take the page lock which | ||
576 | * serializes that). | ||
577 | */ | ||
578 | if (PageKsm(page) && !offlining) { | ||
579 | rc = -EBUSY; | ||
580 | goto unlock; | ||
581 | } | ||
582 | |||
624 | /* charge against new page */ | 583 | /* charge against new page */ |
625 | charge = mem_cgroup_prepare_migration(page, &mem); | 584 | charge = mem_cgroup_prepare_migration(page, &mem); |
626 | if (charge == -ENOMEM) { | 585 | if (charge == -ENOMEM) { |
@@ -737,7 +696,7 @@ move_newpage: | |||
737 | * Return: Number of pages not migrated or error code. | 696 | * Return: Number of pages not migrated or error code. |
738 | */ | 697 | */ |
739 | int migrate_pages(struct list_head *from, | 698 | int migrate_pages(struct list_head *from, |
740 | new_page_t get_new_page, unsigned long private) | 699 | new_page_t get_new_page, unsigned long private, int offlining) |
741 | { | 700 | { |
742 | int retry = 1; | 701 | int retry = 1; |
743 | int nr_failed = 0; | 702 | int nr_failed = 0; |
@@ -746,13 +705,6 @@ int migrate_pages(struct list_head *from, | |||
746 | struct page *page2; | 705 | struct page *page2; |
747 | int swapwrite = current->flags & PF_SWAPWRITE; | 706 | int swapwrite = current->flags & PF_SWAPWRITE; |
748 | int rc; | 707 | int rc; |
749 | unsigned long flags; | ||
750 | |||
751 | local_irq_save(flags); | ||
752 | list_for_each_entry(page, from, lru) | ||
753 | __inc_zone_page_state(page, NR_ISOLATED_ANON + | ||
754 | page_is_file_cache(page)); | ||
755 | local_irq_restore(flags); | ||
756 | 708 | ||
757 | if (!swapwrite) | 709 | if (!swapwrite) |
758 | current->flags |= PF_SWAPWRITE; | 710 | current->flags |= PF_SWAPWRITE; |
@@ -764,7 +716,7 @@ int migrate_pages(struct list_head *from, | |||
764 | cond_resched(); | 716 | cond_resched(); |
765 | 717 | ||
766 | rc = unmap_and_move(get_new_page, private, | 718 | rc = unmap_and_move(get_new_page, private, |
767 | page, pass > 2); | 719 | page, pass > 2, offlining); |
768 | 720 | ||
769 | switch(rc) { | 721 | switch(rc) { |
770 | case -ENOMEM: | 722 | case -ENOMEM: |
@@ -860,7 +812,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
860 | if (!page) | 812 | if (!page) |
861 | goto set_status; | 813 | goto set_status; |
862 | 814 | ||
863 | if (PageReserved(page)) /* Check for zero page */ | 815 | /* Use PageReserved to check for zero page */ |
816 | if (PageReserved(page) || PageKsm(page)) | ||
864 | goto put_and_set; | 817 | goto put_and_set; |
865 | 818 | ||
866 | pp->page = page; | 819 | pp->page = page; |
@@ -878,8 +831,11 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
878 | goto put_and_set; | 831 | goto put_and_set; |
879 | 832 | ||
880 | err = isolate_lru_page(page); | 833 | err = isolate_lru_page(page); |
881 | if (!err) | 834 | if (!err) { |
882 | list_add_tail(&page->lru, &pagelist); | 835 | list_add_tail(&page->lru, &pagelist); |
836 | inc_zone_page_state(page, NR_ISOLATED_ANON + | ||
837 | page_is_file_cache(page)); | ||
838 | } | ||
883 | put_and_set: | 839 | put_and_set: |
884 | /* | 840 | /* |
885 | * Either remove the duplicate refcount from | 841 | * Either remove the duplicate refcount from |
@@ -894,7 +850,7 @@ set_status: | |||
894 | err = 0; | 850 | err = 0; |
895 | if (!list_empty(&pagelist)) | 851 | if (!list_empty(&pagelist)) |
896 | err = migrate_pages(&pagelist, new_page_node, | 852 | err = migrate_pages(&pagelist, new_page_node, |
897 | (unsigned long)pm); | 853 | (unsigned long)pm, 0); |
898 | 854 | ||
899 | up_read(&mm->mmap_sem); | 855 | up_read(&mm->mmap_sem); |
900 | return err; | 856 | return err; |
@@ -953,6 +909,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, | |||
953 | goto out_pm; | 909 | goto out_pm; |
954 | 910 | ||
955 | err = -ENODEV; | 911 | err = -ENODEV; |
912 | if (node < 0 || node >= MAX_NUMNODES) | ||
913 | goto out_pm; | ||
914 | |||
956 | if (!node_state(node, N_HIGH_MEMORY)) | 915 | if (!node_state(node, N_HIGH_MEMORY)) |
957 | goto out_pm; | 916 | goto out_pm; |
958 | 917 | ||
@@ -1015,7 +974,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, | |||
1015 | 974 | ||
1016 | err = -ENOENT; | 975 | err = -ENOENT; |
1017 | /* Use PageReserved to check for zero page */ | 976 | /* Use PageReserved to check for zero page */ |
1018 | if (!page || PageReserved(page)) | 977 | if (!page || PageReserved(page) || PageKsm(page)) |
1019 | goto set_status; | 978 | goto set_status; |
1020 | 979 | ||
1021 | err = page_to_nid(page); | 980 | err = page_to_nid(page); |
@@ -1040,33 +999,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | |||
1040 | #define DO_PAGES_STAT_CHUNK_NR 16 | 999 | #define DO_PAGES_STAT_CHUNK_NR 16 |
1041 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; | 1000 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; |
1042 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; | 1001 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; |
1043 | unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; | ||
1044 | int err; | ||
1045 | 1002 | ||
1046 | for (i = 0; i < nr_pages; i += chunk_nr) { | 1003 | while (nr_pages) { |
1047 | if (chunk_nr + i > nr_pages) | 1004 | unsigned long chunk_nr; |
1048 | chunk_nr = nr_pages - i; | ||
1049 | 1005 | ||
1050 | err = copy_from_user(chunk_pages, &pages[i], | 1006 | chunk_nr = nr_pages; |
1051 | chunk_nr * sizeof(*chunk_pages)); | 1007 | if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) |
1052 | if (err) { | 1008 | chunk_nr = DO_PAGES_STAT_CHUNK_NR; |
1053 | err = -EFAULT; | 1009 | |
1054 | goto out; | 1010 | if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) |
1055 | } | 1011 | break; |
1056 | 1012 | ||
1057 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); | 1013 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); |
1058 | 1014 | ||
1059 | err = copy_to_user(&status[i], chunk_status, | 1015 | if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) |
1060 | chunk_nr * sizeof(*chunk_status)); | 1016 | break; |
1061 | if (err) { | ||
1062 | err = -EFAULT; | ||
1063 | goto out; | ||
1064 | } | ||
1065 | } | ||
1066 | err = 0; | ||
1067 | 1017 | ||
1068 | out: | 1018 | pages += chunk_nr; |
1069 | return err; | 1019 | status += chunk_nr; |
1020 | nr_pages -= chunk_nr; | ||
1021 | } | ||
1022 | return nr_pages ? -EFAULT : 0; | ||
1070 | } | 1023 | } |
1071 | 1024 | ||
1072 | /* | 1025 | /* |