diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 135 |
1 files changed, 47 insertions, 88 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 1a4bf4813780..efddbf0926b2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/mm_inline.h> | 21 | #include <linux/mm_inline.h> |
22 | #include <linux/nsproxy.h> | 22 | #include <linux/nsproxy.h> |
23 | #include <linux/pagevec.h> | 23 | #include <linux/pagevec.h> |
24 | #include <linux/ksm.h> | ||
24 | #include <linux/rmap.h> | 25 | #include <linux/rmap.h> |
25 | #include <linux/topology.h> | 26 | #include <linux/topology.h> |
26 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
@@ -78,8 +79,8 @@ int putback_lru_pages(struct list_head *l) | |||
78 | /* | 79 | /* |
79 | * Restore a potential migration pte to a working pte entry | 80 | * Restore a potential migration pte to a working pte entry |
80 | */ | 81 | */ |
81 | static void remove_migration_pte(struct vm_area_struct *vma, | 82 | static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, |
82 | struct page *old, struct page *new) | 83 | unsigned long addr, void *old) |
83 | { | 84 | { |
84 | struct mm_struct *mm = vma->vm_mm; | 85 | struct mm_struct *mm = vma->vm_mm; |
85 | swp_entry_t entry; | 86 | swp_entry_t entry; |
@@ -88,40 +89,37 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
88 | pmd_t *pmd; | 89 | pmd_t *pmd; |
89 | pte_t *ptep, pte; | 90 | pte_t *ptep, pte; |
90 | spinlock_t *ptl; | 91 | spinlock_t *ptl; |
91 | unsigned long addr = page_address_in_vma(new, vma); | ||
92 | |||
93 | if (addr == -EFAULT) | ||
94 | return; | ||
95 | 92 | ||
96 | pgd = pgd_offset(mm, addr); | 93 | pgd = pgd_offset(mm, addr); |
97 | if (!pgd_present(*pgd)) | 94 | if (!pgd_present(*pgd)) |
98 | return; | 95 | goto out; |
99 | 96 | ||
100 | pud = pud_offset(pgd, addr); | 97 | pud = pud_offset(pgd, addr); |
101 | if (!pud_present(*pud)) | 98 | if (!pud_present(*pud)) |
102 | return; | 99 | goto out; |
103 | 100 | ||
104 | pmd = pmd_offset(pud, addr); | 101 | pmd = pmd_offset(pud, addr); |
105 | if (!pmd_present(*pmd)) | 102 | if (!pmd_present(*pmd)) |
106 | return; | 103 | goto out; |
107 | 104 | ||
108 | ptep = pte_offset_map(pmd, addr); | 105 | ptep = pte_offset_map(pmd, addr); |
109 | 106 | ||
110 | if (!is_swap_pte(*ptep)) { | 107 | if (!is_swap_pte(*ptep)) { |
111 | pte_unmap(ptep); | 108 | pte_unmap(ptep); |
112 | return; | 109 | goto out; |
113 | } | 110 | } |
114 | 111 | ||
115 | ptl = pte_lockptr(mm, pmd); | 112 | ptl = pte_lockptr(mm, pmd); |
116 | spin_lock(ptl); | 113 | spin_lock(ptl); |
117 | pte = *ptep; | 114 | pte = *ptep; |
118 | if (!is_swap_pte(pte)) | 115 | if (!is_swap_pte(pte)) |
119 | goto out; | 116 | goto unlock; |
120 | 117 | ||
121 | entry = pte_to_swp_entry(pte); | 118 | entry = pte_to_swp_entry(pte); |
122 | 119 | ||
123 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 120 | if (!is_migration_entry(entry) || |
124 | goto out; | 121 | migration_entry_to_page(entry) != old) |
122 | goto unlock; | ||
125 | 123 | ||
126 | get_page(new); | 124 | get_page(new); |
127 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 125 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
@@ -137,58 +135,10 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
137 | 135 | ||
138 | /* No need to invalidate - it was non-present before */ | 136 | /* No need to invalidate - it was non-present before */ |
139 | update_mmu_cache(vma, addr, pte); | 137 | update_mmu_cache(vma, addr, pte); |
140 | 138 | unlock: | |
141 | out: | ||
142 | pte_unmap_unlock(ptep, ptl); | 139 | pte_unmap_unlock(ptep, ptl); |
143 | } | 140 | out: |
144 | 141 | return SWAP_AGAIN; | |
145 | /* | ||
146 | * Note that remove_file_migration_ptes will only work on regular mappings, | ||
147 | * Nonlinear mappings do not use migration entries. | ||
148 | */ | ||
149 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
150 | { | ||
151 | struct vm_area_struct *vma; | ||
152 | struct address_space *mapping = new->mapping; | ||
153 | struct prio_tree_iter iter; | ||
154 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
155 | |||
156 | if (!mapping) | ||
157 | return; | ||
158 | |||
159 | spin_lock(&mapping->i_mmap_lock); | ||
160 | |||
161 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
162 | remove_migration_pte(vma, old, new); | ||
163 | |||
164 | spin_unlock(&mapping->i_mmap_lock); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * Must hold mmap_sem lock on at least one of the vmas containing | ||
169 | * the page so that the anon_vma cannot vanish. | ||
170 | */ | ||
171 | static void remove_anon_migration_ptes(struct page *old, struct page *new) | ||
172 | { | ||
173 | struct anon_vma *anon_vma; | ||
174 | struct vm_area_struct *vma; | ||
175 | unsigned long mapping; | ||
176 | |||
177 | mapping = (unsigned long)new->mapping; | ||
178 | |||
179 | if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) | ||
180 | return; | ||
181 | |||
182 | /* | ||
183 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. | ||
184 | */ | ||
185 | anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); | ||
186 | spin_lock(&anon_vma->lock); | ||
187 | |||
188 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | ||
189 | remove_migration_pte(vma, old, new); | ||
190 | |||
191 | spin_unlock(&anon_vma->lock); | ||
192 | } | 142 | } |
193 | 143 | ||
194 | /* | 144 | /* |
@@ -197,10 +147,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new) | |||
197 | */ | 147 | */ |
198 | static void remove_migration_ptes(struct page *old, struct page *new) | 148 | static void remove_migration_ptes(struct page *old, struct page *new) |
199 | { | 149 | { |
200 | if (PageAnon(new)) | 150 | rmap_walk(new, remove_migration_pte, old); |
201 | remove_anon_migration_ptes(old, new); | ||
202 | else | ||
203 | remove_file_migration_ptes(old, new); | ||
204 | } | 151 | } |
205 | 152 | ||
206 | /* | 153 | /* |
@@ -341,8 +288,8 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
341 | if (TestClearPageActive(page)) { | 288 | if (TestClearPageActive(page)) { |
342 | VM_BUG_ON(PageUnevictable(page)); | 289 | VM_BUG_ON(PageUnevictable(page)); |
343 | SetPageActive(newpage); | 290 | SetPageActive(newpage); |
344 | } else | 291 | } else if (TestClearPageUnevictable(page)) |
345 | unevictable_migrate_page(newpage, page); | 292 | SetPageUnevictable(newpage); |
346 | if (PageChecked(page)) | 293 | if (PageChecked(page)) |
347 | SetPageChecked(newpage); | 294 | SetPageChecked(newpage); |
348 | if (PageMappedToDisk(page)) | 295 | if (PageMappedToDisk(page)) |
@@ -361,6 +308,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
361 | } | 308 | } |
362 | 309 | ||
363 | mlock_migrate_page(newpage, page); | 310 | mlock_migrate_page(newpage, page); |
311 | ksm_migrate_page(newpage, page); | ||
364 | 312 | ||
365 | ClearPageSwapCache(page); | 313 | ClearPageSwapCache(page); |
366 | ClearPagePrivate(page); | 314 | ClearPagePrivate(page); |
@@ -580,9 +528,9 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
580 | else | 528 | else |
581 | rc = fallback_migrate_page(mapping, newpage, page); | 529 | rc = fallback_migrate_page(mapping, newpage, page); |
582 | 530 | ||
583 | if (!rc) { | 531 | if (!rc) |
584 | remove_migration_ptes(page, newpage); | 532 | remove_migration_ptes(page, newpage); |
585 | } else | 533 | else |
586 | newpage->mapping = NULL; | 534 | newpage->mapping = NULL; |
587 | 535 | ||
588 | unlock_page(newpage); | 536 | unlock_page(newpage); |
@@ -595,14 +543,14 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
595 | * to the newly allocated page in newpage. | 543 | * to the newly allocated page in newpage. |
596 | */ | 544 | */ |
597 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, | 545 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, |
598 | struct page *page, int force) | 546 | struct page *page, int force, int offlining) |
599 | { | 547 | { |
600 | int rc = 0; | 548 | int rc = 0; |
601 | int *result = NULL; | 549 | int *result = NULL; |
602 | struct page *newpage = get_new_page(page, private, &result); | 550 | struct page *newpage = get_new_page(page, private, &result); |
603 | int rcu_locked = 0; | 551 | int rcu_locked = 0; |
604 | int charge = 0; | 552 | int charge = 0; |
605 | struct mem_cgroup *mem; | 553 | struct mem_cgroup *mem = NULL; |
606 | 554 | ||
607 | if (!newpage) | 555 | if (!newpage) |
608 | return -ENOMEM; | 556 | return -ENOMEM; |
@@ -621,6 +569,20 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
621 | lock_page(page); | 569 | lock_page(page); |
622 | } | 570 | } |
623 | 571 | ||
572 | /* | ||
573 | * Only memory hotplug's offline_pages() caller has locked out KSM, | ||
574 | * and can safely migrate a KSM page. The other cases have skipped | ||
575 | * PageKsm along with PageReserved - but it is only now when we have | ||
576 | * the page lock that we can be certain it will not go KSM beneath us | ||
577 | * (KSM will not upgrade a page from PageAnon to PageKsm when it sees | ||
578 | * its pagecount raised, but only here do we take the page lock which | ||
579 | * serializes that). | ||
580 | */ | ||
581 | if (PageKsm(page) && !offlining) { | ||
582 | rc = -EBUSY; | ||
583 | goto unlock; | ||
584 | } | ||
585 | |||
624 | /* charge against new page */ | 586 | /* charge against new page */ |
625 | charge = mem_cgroup_prepare_migration(page, &mem); | 587 | charge = mem_cgroup_prepare_migration(page, &mem); |
626 | if (charge == -ENOMEM) { | 588 | if (charge == -ENOMEM) { |
@@ -737,7 +699,7 @@ move_newpage: | |||
737 | * Return: Number of pages not migrated or error code. | 699 | * Return: Number of pages not migrated or error code. |
738 | */ | 700 | */ |
739 | int migrate_pages(struct list_head *from, | 701 | int migrate_pages(struct list_head *from, |
740 | new_page_t get_new_page, unsigned long private) | 702 | new_page_t get_new_page, unsigned long private, int offlining) |
741 | { | 703 | { |
742 | int retry = 1; | 704 | int retry = 1; |
743 | int nr_failed = 0; | 705 | int nr_failed = 0; |
@@ -746,13 +708,6 @@ int migrate_pages(struct list_head *from, | |||
746 | struct page *page2; | 708 | struct page *page2; |
747 | int swapwrite = current->flags & PF_SWAPWRITE; | 709 | int swapwrite = current->flags & PF_SWAPWRITE; |
748 | int rc; | 710 | int rc; |
749 | unsigned long flags; | ||
750 | |||
751 | local_irq_save(flags); | ||
752 | list_for_each_entry(page, from, lru) | ||
753 | __inc_zone_page_state(page, NR_ISOLATED_ANON + | ||
754 | page_is_file_cache(page)); | ||
755 | local_irq_restore(flags); | ||
756 | 711 | ||
757 | if (!swapwrite) | 712 | if (!swapwrite) |
758 | current->flags |= PF_SWAPWRITE; | 713 | current->flags |= PF_SWAPWRITE; |
@@ -764,7 +719,7 @@ int migrate_pages(struct list_head *from, | |||
764 | cond_resched(); | 719 | cond_resched(); |
765 | 720 | ||
766 | rc = unmap_and_move(get_new_page, private, | 721 | rc = unmap_and_move(get_new_page, private, |
767 | page, pass > 2); | 722 | page, pass > 2, offlining); |
768 | 723 | ||
769 | switch(rc) { | 724 | switch(rc) { |
770 | case -ENOMEM: | 725 | case -ENOMEM: |
@@ -860,7 +815,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
860 | if (!page) | 815 | if (!page) |
861 | goto set_status; | 816 | goto set_status; |
862 | 817 | ||
863 | if (PageReserved(page)) /* Check for zero page */ | 818 | /* Use PageReserved to check for zero page */ |
819 | if (PageReserved(page) || PageKsm(page)) | ||
864 | goto put_and_set; | 820 | goto put_and_set; |
865 | 821 | ||
866 | pp->page = page; | 822 | pp->page = page; |
@@ -878,8 +834,11 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
878 | goto put_and_set; | 834 | goto put_and_set; |
879 | 835 | ||
880 | err = isolate_lru_page(page); | 836 | err = isolate_lru_page(page); |
881 | if (!err) | 837 | if (!err) { |
882 | list_add_tail(&page->lru, &pagelist); | 838 | list_add_tail(&page->lru, &pagelist); |
839 | inc_zone_page_state(page, NR_ISOLATED_ANON + | ||
840 | page_is_file_cache(page)); | ||
841 | } | ||
883 | put_and_set: | 842 | put_and_set: |
884 | /* | 843 | /* |
885 | * Either remove the duplicate refcount from | 844 | * Either remove the duplicate refcount from |
@@ -894,7 +853,7 @@ set_status: | |||
894 | err = 0; | 853 | err = 0; |
895 | if (!list_empty(&pagelist)) | 854 | if (!list_empty(&pagelist)) |
896 | err = migrate_pages(&pagelist, new_page_node, | 855 | err = migrate_pages(&pagelist, new_page_node, |
897 | (unsigned long)pm); | 856 | (unsigned long)pm, 0); |
898 | 857 | ||
899 | up_read(&mm->mmap_sem); | 858 | up_read(&mm->mmap_sem); |
900 | return err; | 859 | return err; |
@@ -1015,7 +974,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, | |||
1015 | 974 | ||
1016 | err = -ENOENT; | 975 | err = -ENOENT; |
1017 | /* Use PageReserved to check for zero page */ | 976 | /* Use PageReserved to check for zero page */ |
1018 | if (!page || PageReserved(page)) | 977 | if (!page || PageReserved(page) || PageKsm(page)) |
1019 | goto set_status; | 978 | goto set_status; |
1020 | 979 | ||
1021 | err = page_to_nid(page); | 980 | err = page_to_nid(page); |
@@ -1044,7 +1003,7 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | |||
1044 | int err; | 1003 | int err; |
1045 | 1004 | ||
1046 | for (i = 0; i < nr_pages; i += chunk_nr) { | 1005 | for (i = 0; i < nr_pages; i += chunk_nr) { |
1047 | if (chunk_nr + i > nr_pages) | 1006 | if (chunk_nr > nr_pages - i) |
1048 | chunk_nr = nr_pages - i; | 1007 | chunk_nr = nr_pages - i; |
1049 | 1008 | ||
1050 | err = copy_from_user(chunk_pages, &pages[i], | 1009 | err = copy_from_user(chunk_pages, &pages[i], |