diff options
Diffstat (limited to 'mm/migrate.c')
| -rw-r--r-- | mm/migrate.c | 177 |
1 files changed, 65 insertions, 112 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 7dbcb22316d2..d3f3f7f81075 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/mm_inline.h> | 21 | #include <linux/mm_inline.h> |
| 22 | #include <linux/nsproxy.h> | 22 | #include <linux/nsproxy.h> |
| 23 | #include <linux/pagevec.h> | 23 | #include <linux/pagevec.h> |
| 24 | #include <linux/ksm.h> | ||
| 24 | #include <linux/rmap.h> | 25 | #include <linux/rmap.h> |
| 25 | #include <linux/topology.h> | 26 | #include <linux/topology.h> |
| 26 | #include <linux/cpu.h> | 27 | #include <linux/cpu.h> |
| @@ -31,6 +32,7 @@ | |||
| 31 | #include <linux/security.h> | 32 | #include <linux/security.h> |
| 32 | #include <linux/memcontrol.h> | 33 | #include <linux/memcontrol.h> |
| 33 | #include <linux/syscalls.h> | 34 | #include <linux/syscalls.h> |
| 35 | #include <linux/gfp.h> | ||
| 34 | 36 | ||
| 35 | #include "internal.h" | 37 | #include "internal.h" |
| 36 | 38 | ||
| @@ -78,8 +80,8 @@ int putback_lru_pages(struct list_head *l) | |||
| 78 | /* | 80 | /* |
| 79 | * Restore a potential migration pte to a working pte entry | 81 | * Restore a potential migration pte to a working pte entry |
| 80 | */ | 82 | */ |
| 81 | static void remove_migration_pte(struct vm_area_struct *vma, | 83 | static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, |
| 82 | struct page *old, struct page *new) | 84 | unsigned long addr, void *old) |
| 83 | { | 85 | { |
| 84 | struct mm_struct *mm = vma->vm_mm; | 86 | struct mm_struct *mm = vma->vm_mm; |
| 85 | swp_entry_t entry; | 87 | swp_entry_t entry; |
| @@ -88,40 +90,37 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
| 88 | pmd_t *pmd; | 90 | pmd_t *pmd; |
| 89 | pte_t *ptep, pte; | 91 | pte_t *ptep, pte; |
| 90 | spinlock_t *ptl; | 92 | spinlock_t *ptl; |
| 91 | unsigned long addr = page_address_in_vma(new, vma); | ||
| 92 | |||
| 93 | if (addr == -EFAULT) | ||
| 94 | return; | ||
| 95 | 93 | ||
| 96 | pgd = pgd_offset(mm, addr); | 94 | pgd = pgd_offset(mm, addr); |
| 97 | if (!pgd_present(*pgd)) | 95 | if (!pgd_present(*pgd)) |
| 98 | return; | 96 | goto out; |
| 99 | 97 | ||
| 100 | pud = pud_offset(pgd, addr); | 98 | pud = pud_offset(pgd, addr); |
| 101 | if (!pud_present(*pud)) | 99 | if (!pud_present(*pud)) |
| 102 | return; | 100 | goto out; |
| 103 | 101 | ||
| 104 | pmd = pmd_offset(pud, addr); | 102 | pmd = pmd_offset(pud, addr); |
| 105 | if (!pmd_present(*pmd)) | 103 | if (!pmd_present(*pmd)) |
| 106 | return; | 104 | goto out; |
| 107 | 105 | ||
| 108 | ptep = pte_offset_map(pmd, addr); | 106 | ptep = pte_offset_map(pmd, addr); |
| 109 | 107 | ||
| 110 | if (!is_swap_pte(*ptep)) { | 108 | if (!is_swap_pte(*ptep)) { |
| 111 | pte_unmap(ptep); | 109 | pte_unmap(ptep); |
| 112 | return; | 110 | goto out; |
| 113 | } | 111 | } |
| 114 | 112 | ||
| 115 | ptl = pte_lockptr(mm, pmd); | 113 | ptl = pte_lockptr(mm, pmd); |
| 116 | spin_lock(ptl); | 114 | spin_lock(ptl); |
| 117 | pte = *ptep; | 115 | pte = *ptep; |
| 118 | if (!is_swap_pte(pte)) | 116 | if (!is_swap_pte(pte)) |
| 119 | goto out; | 117 | goto unlock; |
| 120 | 118 | ||
| 121 | entry = pte_to_swp_entry(pte); | 119 | entry = pte_to_swp_entry(pte); |
| 122 | 120 | ||
| 123 | if (!is_migration_entry(entry) || migration_entry_to_page(entry) != old) | 121 | if (!is_migration_entry(entry) || |
| 124 | goto out; | 122 | migration_entry_to_page(entry) != old) |
| 123 | goto unlock; | ||
| 125 | 124 | ||
| 126 | get_page(new); | 125 | get_page(new); |
| 127 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); | 126 | pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); |
| @@ -136,59 +135,11 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
| 136 | page_add_file_rmap(new); | 135 | page_add_file_rmap(new); |
| 137 | 136 | ||
| 138 | /* No need to invalidate - it was non-present before */ | 137 | /* No need to invalidate - it was non-present before */ |
| 139 | update_mmu_cache(vma, addr, pte); | 138 | update_mmu_cache(vma, addr, ptep); |
| 140 | 139 | unlock: | |
| 141 | out: | ||
| 142 | pte_unmap_unlock(ptep, ptl); | 140 | pte_unmap_unlock(ptep, ptl); |
| 143 | } | 141 | out: |
| 144 | 142 | return SWAP_AGAIN; | |
| 145 | /* | ||
| 146 | * Note that remove_file_migration_ptes will only work on regular mappings, | ||
| 147 | * Nonlinear mappings do not use migration entries. | ||
| 148 | */ | ||
| 149 | static void remove_file_migration_ptes(struct page *old, struct page *new) | ||
| 150 | { | ||
| 151 | struct vm_area_struct *vma; | ||
| 152 | struct address_space *mapping = new->mapping; | ||
| 153 | struct prio_tree_iter iter; | ||
| 154 | pgoff_t pgoff = new->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
| 155 | |||
| 156 | if (!mapping) | ||
| 157 | return; | ||
| 158 | |||
| 159 | spin_lock(&mapping->i_mmap_lock); | ||
| 160 | |||
| 161 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) | ||
| 162 | remove_migration_pte(vma, old, new); | ||
| 163 | |||
| 164 | spin_unlock(&mapping->i_mmap_lock); | ||
| 165 | } | ||
| 166 | |||
| 167 | /* | ||
| 168 | * Must hold mmap_sem lock on at least one of the vmas containing | ||
| 169 | * the page so that the anon_vma cannot vanish. | ||
| 170 | */ | ||
| 171 | static void remove_anon_migration_ptes(struct page *old, struct page *new) | ||
| 172 | { | ||
| 173 | struct anon_vma *anon_vma; | ||
| 174 | struct vm_area_struct *vma; | ||
| 175 | unsigned long mapping; | ||
| 176 | |||
| 177 | mapping = (unsigned long)new->mapping; | ||
| 178 | |||
| 179 | if (!mapping || (mapping & PAGE_MAPPING_ANON) == 0) | ||
| 180 | return; | ||
| 181 | |||
| 182 | /* | ||
| 183 | * We hold the mmap_sem lock. So no need to call page_lock_anon_vma. | ||
| 184 | */ | ||
| 185 | anon_vma = (struct anon_vma *) (mapping - PAGE_MAPPING_ANON); | ||
| 186 | spin_lock(&anon_vma->lock); | ||
| 187 | |||
| 188 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) | ||
| 189 | remove_migration_pte(vma, old, new); | ||
| 190 | |||
| 191 | spin_unlock(&anon_vma->lock); | ||
| 192 | } | 143 | } |
| 193 | 144 | ||
| 194 | /* | 145 | /* |
| @@ -197,10 +148,7 @@ static void remove_anon_migration_ptes(struct page *old, struct page *new) | |||
| 197 | */ | 148 | */ |
| 198 | static void remove_migration_ptes(struct page *old, struct page *new) | 149 | static void remove_migration_ptes(struct page *old, struct page *new) |
| 199 | { | 150 | { |
| 200 | if (PageAnon(new)) | 151 | rmap_walk(new, remove_migration_pte, old); |
| 201 | remove_anon_migration_ptes(old, new); | ||
| 202 | else | ||
| 203 | remove_file_migration_ptes(old, new); | ||
| 204 | } | 152 | } |
| 205 | 153 | ||
| 206 | /* | 154 | /* |
| @@ -328,8 +276,6 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
| 328 | */ | 276 | */ |
| 329 | static void migrate_page_copy(struct page *newpage, struct page *page) | 277 | static void migrate_page_copy(struct page *newpage, struct page *page) |
| 330 | { | 278 | { |
| 331 | int anon; | ||
| 332 | |||
| 333 | copy_highpage(newpage, page); | 279 | copy_highpage(newpage, page); |
| 334 | 280 | ||
| 335 | if (PageError(page)) | 281 | if (PageError(page)) |
| @@ -341,8 +287,8 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
| 341 | if (TestClearPageActive(page)) { | 287 | if (TestClearPageActive(page)) { |
| 342 | VM_BUG_ON(PageUnevictable(page)); | 288 | VM_BUG_ON(PageUnevictable(page)); |
| 343 | SetPageActive(newpage); | 289 | SetPageActive(newpage); |
| 344 | } else | 290 | } else if (TestClearPageUnevictable(page)) |
| 345 | unevictable_migrate_page(newpage, page); | 291 | SetPageUnevictable(newpage); |
| 346 | if (PageChecked(page)) | 292 | if (PageChecked(page)) |
| 347 | SetPageChecked(newpage); | 293 | SetPageChecked(newpage); |
| 348 | if (PageMappedToDisk(page)) | 294 | if (PageMappedToDisk(page)) |
| @@ -361,12 +307,11 @@ static void migrate_page_copy(struct page *newpage, struct page *page) | |||
| 361 | } | 307 | } |
| 362 | 308 | ||
| 363 | mlock_migrate_page(newpage, page); | 309 | mlock_migrate_page(newpage, page); |
| 310 | ksm_migrate_page(newpage, page); | ||
| 364 | 311 | ||
| 365 | ClearPageSwapCache(page); | 312 | ClearPageSwapCache(page); |
| 366 | ClearPagePrivate(page); | 313 | ClearPagePrivate(page); |
| 367 | set_page_private(page, 0); | 314 | set_page_private(page, 0); |
| 368 | /* page->mapping contains a flag for PageAnon() */ | ||
| 369 | anon = PageAnon(page); | ||
| 370 | page->mapping = NULL; | 315 | page->mapping = NULL; |
| 371 | 316 | ||
| 372 | /* | 317 | /* |
| @@ -580,9 +525,9 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
| 580 | else | 525 | else |
| 581 | rc = fallback_migrate_page(mapping, newpage, page); | 526 | rc = fallback_migrate_page(mapping, newpage, page); |
| 582 | 527 | ||
| 583 | if (!rc) { | 528 | if (!rc) |
| 584 | remove_migration_ptes(page, newpage); | 529 | remove_migration_ptes(page, newpage); |
| 585 | } else | 530 | else |
| 586 | newpage->mapping = NULL; | 531 | newpage->mapping = NULL; |
| 587 | 532 | ||
| 588 | unlock_page(newpage); | 533 | unlock_page(newpage); |
| @@ -595,7 +540,7 @@ static int move_to_new_page(struct page *newpage, struct page *page) | |||
| 595 | * to the newly allocated page in newpage. | 540 | * to the newly allocated page in newpage. |
| 596 | */ | 541 | */ |
| 597 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, | 542 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, |
| 598 | struct page *page, int force) | 543 | struct page *page, int force, int offlining) |
| 599 | { | 544 | { |
| 600 | int rc = 0; | 545 | int rc = 0; |
| 601 | int *result = NULL; | 546 | int *result = NULL; |
| @@ -621,6 +566,20 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
| 621 | lock_page(page); | 566 | lock_page(page); |
| 622 | } | 567 | } |
| 623 | 568 | ||
| 569 | /* | ||
| 570 | * Only memory hotplug's offline_pages() caller has locked out KSM, | ||
| 571 | * and can safely migrate a KSM page. The other cases have skipped | ||
| 572 | * PageKsm along with PageReserved - but it is only now when we have | ||
| 573 | * the page lock that we can be certain it will not go KSM beneath us | ||
| 574 | * (KSM will not upgrade a page from PageAnon to PageKsm when it sees | ||
| 575 | * its pagecount raised, but only here do we take the page lock which | ||
| 576 | * serializes that). | ||
| 577 | */ | ||
| 578 | if (PageKsm(page) && !offlining) { | ||
| 579 | rc = -EBUSY; | ||
| 580 | goto unlock; | ||
| 581 | } | ||
| 582 | |||
| 624 | /* charge against new page */ | 583 | /* charge against new page */ |
| 625 | charge = mem_cgroup_prepare_migration(page, &mem); | 584 | charge = mem_cgroup_prepare_migration(page, &mem); |
| 626 | if (charge == -ENOMEM) { | 585 | if (charge == -ENOMEM) { |
| @@ -737,7 +696,7 @@ move_newpage: | |||
| 737 | * Return: Number of pages not migrated or error code. | 696 | * Return: Number of pages not migrated or error code. |
| 738 | */ | 697 | */ |
| 739 | int migrate_pages(struct list_head *from, | 698 | int migrate_pages(struct list_head *from, |
| 740 | new_page_t get_new_page, unsigned long private) | 699 | new_page_t get_new_page, unsigned long private, int offlining) |
| 741 | { | 700 | { |
| 742 | int retry = 1; | 701 | int retry = 1; |
| 743 | int nr_failed = 0; | 702 | int nr_failed = 0; |
| @@ -746,13 +705,6 @@ int migrate_pages(struct list_head *from, | |||
| 746 | struct page *page2; | 705 | struct page *page2; |
| 747 | int swapwrite = current->flags & PF_SWAPWRITE; | 706 | int swapwrite = current->flags & PF_SWAPWRITE; |
| 748 | int rc; | 707 | int rc; |
| 749 | unsigned long flags; | ||
| 750 | |||
| 751 | local_irq_save(flags); | ||
| 752 | list_for_each_entry(page, from, lru) | ||
| 753 | __inc_zone_page_state(page, NR_ISOLATED_ANON + | ||
| 754 | page_is_file_cache(page)); | ||
| 755 | local_irq_restore(flags); | ||
| 756 | 708 | ||
| 757 | if (!swapwrite) | 709 | if (!swapwrite) |
| 758 | current->flags |= PF_SWAPWRITE; | 710 | current->flags |= PF_SWAPWRITE; |
| @@ -764,7 +716,7 @@ int migrate_pages(struct list_head *from, | |||
| 764 | cond_resched(); | 716 | cond_resched(); |
| 765 | 717 | ||
| 766 | rc = unmap_and_move(get_new_page, private, | 718 | rc = unmap_and_move(get_new_page, private, |
| 767 | page, pass > 2); | 719 | page, pass > 2, offlining); |
| 768 | 720 | ||
| 769 | switch(rc) { | 721 | switch(rc) { |
| 770 | case -ENOMEM: | 722 | case -ENOMEM: |
| @@ -860,7 +812,8 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
| 860 | if (!page) | 812 | if (!page) |
| 861 | goto set_status; | 813 | goto set_status; |
| 862 | 814 | ||
| 863 | if (PageReserved(page)) /* Check for zero page */ | 815 | /* Use PageReserved to check for zero page */ |
| 816 | if (PageReserved(page) || PageKsm(page)) | ||
| 864 | goto put_and_set; | 817 | goto put_and_set; |
| 865 | 818 | ||
| 866 | pp->page = page; | 819 | pp->page = page; |
| @@ -878,8 +831,11 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
| 878 | goto put_and_set; | 831 | goto put_and_set; |
| 879 | 832 | ||
| 880 | err = isolate_lru_page(page); | 833 | err = isolate_lru_page(page); |
| 881 | if (!err) | 834 | if (!err) { |
| 882 | list_add_tail(&page->lru, &pagelist); | 835 | list_add_tail(&page->lru, &pagelist); |
| 836 | inc_zone_page_state(page, NR_ISOLATED_ANON + | ||
| 837 | page_is_file_cache(page)); | ||
| 838 | } | ||
| 883 | put_and_set: | 839 | put_and_set: |
| 884 | /* | 840 | /* |
| 885 | * Either remove the duplicate refcount from | 841 | * Either remove the duplicate refcount from |
| @@ -894,7 +850,7 @@ set_status: | |||
| 894 | err = 0; | 850 | err = 0; |
| 895 | if (!list_empty(&pagelist)) | 851 | if (!list_empty(&pagelist)) |
| 896 | err = migrate_pages(&pagelist, new_page_node, | 852 | err = migrate_pages(&pagelist, new_page_node, |
| 897 | (unsigned long)pm); | 853 | (unsigned long)pm, 0); |
| 898 | 854 | ||
| 899 | up_read(&mm->mmap_sem); | 855 | up_read(&mm->mmap_sem); |
| 900 | return err; | 856 | return err; |
| @@ -953,6 +909,9 @@ static int do_pages_move(struct mm_struct *mm, struct task_struct *task, | |||
| 953 | goto out_pm; | 909 | goto out_pm; |
| 954 | 910 | ||
| 955 | err = -ENODEV; | 911 | err = -ENODEV; |
| 912 | if (node < 0 || node >= MAX_NUMNODES) | ||
| 913 | goto out_pm; | ||
| 914 | |||
| 956 | if (!node_state(node, N_HIGH_MEMORY)) | 915 | if (!node_state(node, N_HIGH_MEMORY)) |
| 957 | goto out_pm; | 916 | goto out_pm; |
| 958 | 917 | ||
| @@ -1015,7 +974,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, | |||
| 1015 | 974 | ||
| 1016 | err = -ENOENT; | 975 | err = -ENOENT; |
| 1017 | /* Use PageReserved to check for zero page */ | 976 | /* Use PageReserved to check for zero page */ |
| 1018 | if (!page || PageReserved(page)) | 977 | if (!page || PageReserved(page) || PageKsm(page)) |
| 1019 | goto set_status; | 978 | goto set_status; |
| 1020 | 979 | ||
| 1021 | err = page_to_nid(page); | 980 | err = page_to_nid(page); |
| @@ -1040,33 +999,27 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, | |||
| 1040 | #define DO_PAGES_STAT_CHUNK_NR 16 | 999 | #define DO_PAGES_STAT_CHUNK_NR 16 |
| 1041 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; | 1000 | const void __user *chunk_pages[DO_PAGES_STAT_CHUNK_NR]; |
| 1042 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; | 1001 | int chunk_status[DO_PAGES_STAT_CHUNK_NR]; |
| 1043 | unsigned long i, chunk_nr = DO_PAGES_STAT_CHUNK_NR; | ||
| 1044 | int err; | ||
| 1045 | 1002 | ||
| 1046 | for (i = 0; i < nr_pages; i += chunk_nr) { | 1003 | while (nr_pages) { |
| 1047 | if (chunk_nr + i > nr_pages) | 1004 | unsigned long chunk_nr; |
| 1048 | chunk_nr = nr_pages - i; | ||
| 1049 | 1005 | ||
| 1050 | err = copy_from_user(chunk_pages, &pages[i], | 1006 | chunk_nr = nr_pages; |
| 1051 | chunk_nr * sizeof(*chunk_pages)); | 1007 | if (chunk_nr > DO_PAGES_STAT_CHUNK_NR) |
| 1052 | if (err) { | 1008 | chunk_nr = DO_PAGES_STAT_CHUNK_NR; |
| 1053 | err = -EFAULT; | 1009 | |
| 1054 | goto out; | 1010 | if (copy_from_user(chunk_pages, pages, chunk_nr * sizeof(*chunk_pages))) |
| 1055 | } | 1011 | break; |
| 1056 | 1012 | ||
| 1057 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); | 1013 | do_pages_stat_array(mm, chunk_nr, chunk_pages, chunk_status); |
| 1058 | 1014 | ||
| 1059 | err = copy_to_user(&status[i], chunk_status, | 1015 | if (copy_to_user(status, chunk_status, chunk_nr * sizeof(*status))) |
| 1060 | chunk_nr * sizeof(*chunk_status)); | 1016 | break; |
| 1061 | if (err) { | ||
| 1062 | err = -EFAULT; | ||
| 1063 | goto out; | ||
| 1064 | } | ||
| 1065 | } | ||
| 1066 | err = 0; | ||
| 1067 | 1017 | ||
| 1068 | out: | 1018 | pages += chunk_nr; |
| 1069 | return err; | 1019 | status += chunk_nr; |
| 1020 | nr_pages -= chunk_nr; | ||
| 1021 | } | ||
| 1022 | return nr_pages ? -EFAULT : 0; | ||
| 1070 | } | 1023 | } |
| 1071 | 1024 | ||
| 1072 | /* | 1025 | /* |
