aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c378
1 files changed, 294 insertions, 84 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 38e7cad782f4..666e4e677414 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -32,8 +32,11 @@
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/memcontrol.h> 33#include <linux/memcontrol.h>
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/hugetlb.h>
35#include <linux/gfp.h> 36#include <linux/gfp.h>
36 37
38#include <asm/tlbflush.h>
39
37#include "internal.h" 40#include "internal.h"
38 41
39#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) 42#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
@@ -95,26 +98,36 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
95 pte_t *ptep, pte; 98 pte_t *ptep, pte;
96 spinlock_t *ptl; 99 spinlock_t *ptl;
97 100
98 pgd = pgd_offset(mm, addr); 101 if (unlikely(PageHuge(new))) {
99 if (!pgd_present(*pgd)) 102 ptep = huge_pte_offset(mm, addr);
100 goto out; 103 if (!ptep)
104 goto out;
105 ptl = &mm->page_table_lock;
106 } else {
107 pgd = pgd_offset(mm, addr);
108 if (!pgd_present(*pgd))
109 goto out;
101 110
102 pud = pud_offset(pgd, addr); 111 pud = pud_offset(pgd, addr);
103 if (!pud_present(*pud)) 112 if (!pud_present(*pud))
104 goto out; 113 goto out;
105 114
106 pmd = pmd_offset(pud, addr); 115 pmd = pmd_offset(pud, addr);
107 if (!pmd_present(*pmd)) 116 if (pmd_trans_huge(*pmd))
108 goto out; 117 goto out;
118 if (!pmd_present(*pmd))
119 goto out;
109 120
110 ptep = pte_offset_map(pmd, addr); 121 ptep = pte_offset_map(pmd, addr);
111 122
112 if (!is_swap_pte(*ptep)) { 123 if (!is_swap_pte(*ptep)) {
113 pte_unmap(ptep); 124 pte_unmap(ptep);
114 goto out; 125 goto out;
115 } 126 }
127
128 ptl = pte_lockptr(mm, pmd);
129 }
116 130
117 ptl = pte_lockptr(mm, pmd);
118 spin_lock(ptl); 131 spin_lock(ptl);
119 pte = *ptep; 132 pte = *ptep;
120 if (!is_swap_pte(pte)) 133 if (!is_swap_pte(pte))
@@ -130,10 +143,19 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
130 pte = pte_mkold(mk_pte(new, vma->vm_page_prot)); 143 pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
131 if (is_write_migration_entry(entry)) 144 if (is_write_migration_entry(entry))
132 pte = pte_mkwrite(pte); 145 pte = pte_mkwrite(pte);
146#ifdef CONFIG_HUGETLB_PAGE
147 if (PageHuge(new))
148 pte = pte_mkhuge(pte);
149#endif
133 flush_cache_page(vma, addr, pte_pfn(pte)); 150 flush_cache_page(vma, addr, pte_pfn(pte));
134 set_pte_at(mm, addr, ptep, pte); 151 set_pte_at(mm, addr, ptep, pte);
135 152
136 if (PageAnon(new)) 153 if (PageHuge(new)) {
154 if (PageAnon(new))
155 hugepage_add_anon_rmap(new, vma, addr);
156 else
157 page_dup_rmap(new);
158 } else if (PageAnon(new))
137 page_add_anon_rmap(new, vma, addr); 159 page_add_anon_rmap(new, vma, addr);
138 else 160 else
139 page_add_file_rmap(new); 161 page_add_file_rmap(new);
@@ -226,7 +248,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
226 248
227 expected_count = 2 + page_has_private(page); 249 expected_count = 2 + page_has_private(page);
228 if (page_count(page) != expected_count || 250 if (page_count(page) != expected_count ||
229 (struct page *)radix_tree_deref_slot(pslot) != page) { 251 radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
230 spin_unlock_irq(&mapping->tree_lock); 252 spin_unlock_irq(&mapping->tree_lock);
231 return -EAGAIN; 253 return -EAGAIN;
232 } 254 }
@@ -266,7 +288,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
266 */ 288 */
267 __dec_zone_page_state(page, NR_FILE_PAGES); 289 __dec_zone_page_state(page, NR_FILE_PAGES);
268 __inc_zone_page_state(newpage, NR_FILE_PAGES); 290 __inc_zone_page_state(newpage, NR_FILE_PAGES);
269 if (PageSwapBacked(page)) { 291 if (!PageSwapCache(page) && PageSwapBacked(page)) {
270 __dec_zone_page_state(page, NR_SHMEM); 292 __dec_zone_page_state(page, NR_SHMEM);
271 __inc_zone_page_state(newpage, NR_SHMEM); 293 __inc_zone_page_state(newpage, NR_SHMEM);
272 } 294 }
@@ -276,11 +298,59 @@ static int migrate_page_move_mapping(struct address_space *mapping,
276} 298}
277 299
278/* 300/*
301 * The expected number of remaining references is the same as that
302 * of migrate_page_move_mapping().
303 */
304int migrate_huge_page_move_mapping(struct address_space *mapping,
305 struct page *newpage, struct page *page)
306{
307 int expected_count;
308 void **pslot;
309
310 if (!mapping) {
311 if (page_count(page) != 1)
312 return -EAGAIN;
313 return 0;
314 }
315
316 spin_lock_irq(&mapping->tree_lock);
317
318 pslot = radix_tree_lookup_slot(&mapping->page_tree,
319 page_index(page));
320
321 expected_count = 2 + page_has_private(page);
322 if (page_count(page) != expected_count ||
323 radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
324 spin_unlock_irq(&mapping->tree_lock);
325 return -EAGAIN;
326 }
327
328 if (!page_freeze_refs(page, expected_count)) {
329 spin_unlock_irq(&mapping->tree_lock);
330 return -EAGAIN;
331 }
332
333 get_page(newpage);
334
335 radix_tree_replace_slot(pslot, newpage);
336
337 page_unfreeze_refs(page, expected_count);
338
339 __put_page(page);
340
341 spin_unlock_irq(&mapping->tree_lock);
342 return 0;
343}
344
345/*
279 * Copy the page to its new location 346 * Copy the page to its new location
280 */ 347 */
281static void migrate_page_copy(struct page *newpage, struct page *page) 348void migrate_page_copy(struct page *newpage, struct page *page)
282{ 349{
283 copy_highpage(newpage, page); 350 if (PageHuge(page))
351 copy_huge_page(newpage, page);
352 else
353 copy_highpage(newpage, page);
284 354
285 if (PageError(page)) 355 if (PageError(page))
286 SetPageError(newpage); 356 SetPageError(newpage);
@@ -305,7 +375,7 @@ static void migrate_page_copy(struct page *newpage, struct page *page)
305 * redo the accounting that clear_page_dirty_for_io undid, 375 * redo the accounting that clear_page_dirty_for_io undid,
306 * but we can't use set_page_dirty because that function 376 * but we can't use set_page_dirty because that function
307 * is actually a signal that all of the page has become dirty. 377 * is actually a signal that all of the page has become dirty.
308 * Wheras only part of our page may be dirty. 378 * Whereas only part of our page may be dirty.
309 */ 379 */
310 __set_page_dirty_nobuffers(newpage); 380 __set_page_dirty_nobuffers(newpage);
311 } 381 }
@@ -431,7 +501,6 @@ static int writeout(struct address_space *mapping, struct page *page)
431 .nr_to_write = 1, 501 .nr_to_write = 1,
432 .range_start = 0, 502 .range_start = 0,
433 .range_end = LLONG_MAX, 503 .range_end = LLONG_MAX,
434 .nonblocking = 1,
435 .for_reclaim = 1 504 .for_reclaim = 1
436 }; 505 };
437 int rc; 506 int rc;
@@ -495,7 +564,7 @@ static int fallback_migrate_page(struct address_space *mapping,
495 * == 0 - success 564 * == 0 - success
496 */ 565 */
497static int move_to_new_page(struct page *newpage, struct page *page, 566static int move_to_new_page(struct page *newpage, struct page *page,
498 int remap_swapcache) 567 int remap_swapcache, bool sync)
499{ 568{
500 struct address_space *mapping; 569 struct address_space *mapping;
501 int rc; 570 int rc;
@@ -517,18 +586,28 @@ static int move_to_new_page(struct page *newpage, struct page *page,
517 mapping = page_mapping(page); 586 mapping = page_mapping(page);
518 if (!mapping) 587 if (!mapping)
519 rc = migrate_page(mapping, newpage, page); 588 rc = migrate_page(mapping, newpage, page);
520 else if (mapping->a_ops->migratepage) 589 else {
521 /* 590 /*
522 * Most pages have a mapping and most filesystems 591 * Do not writeback pages if !sync and migratepage is
523 * should provide a migration function. Anonymous 592 * not pointing to migrate_page() which is nonblocking
524 * pages are part of swap space which also has its 593 * (swapcache/tmpfs uses migratepage = migrate_page).
525 * own migration function. This is the most common
526 * path for page migration.
527 */ 594 */
528 rc = mapping->a_ops->migratepage(mapping, 595 if (PageDirty(page) && !sync &&
529 newpage, page); 596 mapping->a_ops->migratepage != migrate_page)
530 else 597 rc = -EBUSY;
531 rc = fallback_migrate_page(mapping, newpage, page); 598 else if (mapping->a_ops->migratepage)
599 /*
600 * Most pages have a mapping and most filesystems
601 * should provide a migration function. Anonymous
602 * pages are part of swap space which also has its
603 * own migration function. This is the most common
604 * path for page migration.
605 */
606 rc = mapping->a_ops->migratepage(mapping,
607 newpage, page);
608 else
609 rc = fallback_migrate_page(mapping, newpage, page);
610 }
532 611
533 if (rc) { 612 if (rc) {
534 newpage->mapping = NULL; 613 newpage->mapping = NULL;
@@ -547,15 +626,14 @@ static int move_to_new_page(struct page *newpage, struct page *page,
547 * to the newly allocated page in newpage. 626 * to the newly allocated page in newpage.
548 */ 627 */
549static int unmap_and_move(new_page_t get_new_page, unsigned long private, 628static int unmap_and_move(new_page_t get_new_page, unsigned long private,
550 struct page *page, int force, int offlining) 629 struct page *page, int force, bool offlining, bool sync)
551{ 630{
552 int rc = 0; 631 int rc = 0;
553 int *result = NULL; 632 int *result = NULL;
554 struct page *newpage = get_new_page(page, private, &result); 633 struct page *newpage = get_new_page(page, private, &result);
555 int remap_swapcache = 1; 634 int remap_swapcache = 1;
556 int rcu_locked = 0;
557 int charge = 0; 635 int charge = 0;
558 struct mem_cgroup *mem = NULL; 636 struct mem_cgroup *mem;
559 struct anon_vma *anon_vma = NULL; 637 struct anon_vma *anon_vma = NULL;
560 638
561 if (!newpage) 639 if (!newpage)
@@ -565,13 +643,33 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
565 /* page was freed from under us. So we are done. */ 643 /* page was freed from under us. So we are done. */
566 goto move_newpage; 644 goto move_newpage;
567 } 645 }
646 if (unlikely(PageTransHuge(page)))
647 if (unlikely(split_huge_page(page)))
648 goto move_newpage;
568 649
569 /* prepare cgroup just returns 0 or -ENOMEM */ 650 /* prepare cgroup just returns 0 or -ENOMEM */
570 rc = -EAGAIN; 651 rc = -EAGAIN;
571 652
572 if (!trylock_page(page)) { 653 if (!trylock_page(page)) {
573 if (!force) 654 if (!force || !sync)
655 goto move_newpage;
656
657 /*
658 * It's not safe for direct compaction to call lock_page.
659 * For example, during page readahead pages are added locked
660 * to the LRU. Later, when the IO completes the pages are
661 * marked uptodate and unlocked. However, the queueing
662 * could be merging multiple pages for one bio (e.g.
663 * mpage_readpages). If an allocation happens for the
664 * second or third page, the process can end up locking
665 * the same page twice and deadlocking. Rather than
666 * trying to be clever about what pages can be locked,
667 * avoid the use of lock_page for direct compaction
668 * altogether.
669 */
670 if (current->flags & PF_MEMALLOC)
574 goto move_newpage; 671 goto move_newpage;
672
575 lock_page(page); 673 lock_page(page);
576 } 674 }
577 675
@@ -590,7 +688,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
590 } 688 }
591 689
592 /* charge against new page */ 690 /* charge against new page */
593 charge = mem_cgroup_prepare_migration(page, newpage, &mem); 691 charge = mem_cgroup_prepare_migration(page, newpage, &mem, GFP_KERNEL);
594 if (charge == -ENOMEM) { 692 if (charge == -ENOMEM) {
595 rc = -ENOMEM; 693 rc = -ENOMEM;
596 goto unlock; 694 goto unlock;
@@ -598,6 +696,14 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
598 BUG_ON(charge); 696 BUG_ON(charge);
599 697
600 if (PageWriteback(page)) { 698 if (PageWriteback(page)) {
699 /*
700 * For !sync, there is no point retrying as the retry loop
701 * is expected to be too short for PageWriteback to be cleared
702 */
703 if (!sync) {
704 rc = -EBUSY;
705 goto uncharge;
706 }
601 if (!force) 707 if (!force)
602 goto uncharge; 708 goto uncharge;
603 wait_on_page_writeback(page); 709 wait_on_page_writeback(page);
@@ -605,20 +711,22 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
605 /* 711 /*
606 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, 712 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
607 * we cannot notice that anon_vma is freed while we migrates a page. 713 * we cannot notice that anon_vma is freed while we migrates a page.
608 * This rcu_read_lock() delays freeing anon_vma pointer until the end 714 * This get_anon_vma() delays freeing anon_vma pointer until the end
609 * of migration. File cache pages are no problem because of page_lock() 715 * of migration. File cache pages are no problem because of page_lock()
610 * File Caches may use write_page() or lock_page() in migration, then, 716 * File Caches may use write_page() or lock_page() in migration, then,
611 * just care Anon page here. 717 * just care Anon page here.
612 */ 718 */
613 if (PageAnon(page)) { 719 if (PageAnon(page)) {
614 rcu_read_lock(); 720 /*
615 rcu_locked = 1; 721 * Only page_lock_anon_vma() understands the subtleties of
616 722 * getting a hold on an anon_vma from outside one of its mms.
617 /* Determine how to safely use anon_vma */ 723 */
618 if (!page_mapped(page)) { 724 anon_vma = page_get_anon_vma(page);
619 if (!PageSwapCache(page)) 725 if (anon_vma) {
620 goto rcu_unlock; 726 /*
621 727 * Anon page
728 */
729 } else if (PageSwapCache(page)) {
622 /* 730 /*
623 * We cannot be sure that the anon_vma of an unmapped 731 * We cannot be sure that the anon_vma of an unmapped
624 * swapcache page is safe to use because we don't 732 * swapcache page is safe to use because we don't
@@ -633,13 +741,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
633 */ 741 */
634 remap_swapcache = 0; 742 remap_swapcache = 0;
635 } else { 743 } else {
636 /* 744 goto uncharge;
637 * Take a reference count on the anon_vma if the
638 * page is mapped so that it is guaranteed to
639 * exist when the page is remapped later
640 */
641 anon_vma = page_anon_vma(page);
642 get_anon_vma(anon_vma);
643 } 745 }
644 } 746 }
645 747
@@ -656,16 +758,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
656 * free the metadata, so the page can be freed. 758 * free the metadata, so the page can be freed.
657 */ 759 */
658 if (!page->mapping) { 760 if (!page->mapping) {
659 if (!PageAnon(page) && page_has_private(page)) { 761 VM_BUG_ON(PageAnon(page));
660 /* 762 if (page_has_private(page)) {
661 * Go direct to try_to_free_buffers() here because
662 * a) that's what try_to_release_page() would do anyway
663 * b) we may be under rcu_read_lock() here, so we can't
664 * use GFP_KERNEL which is what try_to_release_page()
665 * needs to be effective.
666 */
667 try_to_free_buffers(page); 763 try_to_free_buffers(page);
668 goto rcu_unlock; 764 goto uncharge;
669 } 765 }
670 goto skip_unmap; 766 goto skip_unmap;
671 } 767 }
@@ -675,24 +771,22 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
675 771
676skip_unmap: 772skip_unmap:
677 if (!page_mapped(page)) 773 if (!page_mapped(page))
678 rc = move_to_new_page(newpage, page, remap_swapcache); 774 rc = move_to_new_page(newpage, page, remap_swapcache, sync);
679 775
680 if (rc && remap_swapcache) 776 if (rc && remap_swapcache)
681 remove_migration_ptes(page, page); 777 remove_migration_ptes(page, page);
682rcu_unlock:
683 778
684 /* Drop an anon_vma reference if we took one */ 779 /* Drop an anon_vma reference if we took one */
685 if (anon_vma) 780 if (anon_vma)
686 drop_anon_vma(anon_vma); 781 put_anon_vma(anon_vma);
687 782
688 if (rcu_locked)
689 rcu_read_unlock();
690uncharge: 783uncharge:
691 if (!charge) 784 if (!charge)
692 mem_cgroup_end_migration(mem, page, newpage); 785 mem_cgroup_end_migration(mem, page, newpage, rc == 0);
693unlock: 786unlock:
694 unlock_page(page); 787 unlock_page(page);
695 788
789move_newpage:
696 if (rc != -EAGAIN) { 790 if (rc != -EAGAIN) {
697 /* 791 /*
698 * A page that has been migrated has all references 792 * A page that has been migrated has all references
@@ -706,8 +800,6 @@ unlock:
706 putback_lru_page(page); 800 putback_lru_page(page);
707 } 801 }
708 802
709move_newpage:
710
711 /* 803 /*
712 * Move the new page to the LRU. If migration was not successful 804 * Move the new page to the LRU. If migration was not successful
713 * then this will free the page. 805 * then this will free the page.
@@ -724,6 +816,76 @@ move_newpage:
724} 816}
725 817
726/* 818/*
819 * Counterpart of unmap_and_move_page() for hugepage migration.
820 *
821 * This function doesn't wait the completion of hugepage I/O
822 * because there is no race between I/O and migration for hugepage.
823 * Note that currently hugepage I/O occurs only in direct I/O
824 * where no lock is held and PG_writeback is irrelevant,
825 * and writeback status of all subpages are counted in the reference
826 * count of the head page (i.e. if all subpages of a 2MB hugepage are
827 * under direct I/O, the reference of the head page is 512 and a bit more.)
828 * This means that when we try to migrate hugepage whose subpages are
829 * doing direct I/O, some references remain after try_to_unmap() and
830 * hugepage migration fails without data corruption.
831 *
832 * There is also no race when direct I/O is issued on the page under migration,
833 * because then pte is replaced with migration swap entry and direct I/O code
834 * will wait in the page fault for migration to complete.
835 */
836static int unmap_and_move_huge_page(new_page_t get_new_page,
837 unsigned long private, struct page *hpage,
838 int force, bool offlining, bool sync)
839{
840 int rc = 0;
841 int *result = NULL;
842 struct page *new_hpage = get_new_page(hpage, private, &result);
843 struct anon_vma *anon_vma = NULL;
844
845 if (!new_hpage)
846 return -ENOMEM;
847
848 rc = -EAGAIN;
849
850 if (!trylock_page(hpage)) {
851 if (!force || !sync)
852 goto out;
853 lock_page(hpage);
854 }
855
856 if (PageAnon(hpage))
857 anon_vma = page_get_anon_vma(hpage);
858
859 try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
860
861 if (!page_mapped(hpage))
862 rc = move_to_new_page(new_hpage, hpage, 1, sync);
863
864 if (rc)
865 remove_migration_ptes(hpage, hpage);
866
867 if (anon_vma)
868 put_anon_vma(anon_vma);
869out:
870 unlock_page(hpage);
871
872 if (rc != -EAGAIN) {
873 list_del(&hpage->lru);
874 put_page(hpage);
875 }
876
877 put_page(new_hpage);
878
879 if (result) {
880 if (rc)
881 *result = rc;
882 else
883 *result = page_to_nid(new_hpage);
884 }
885 return rc;
886}
887
888/*
727 * migrate_pages 889 * migrate_pages
728 * 890 *
729 * The function takes one list of pages to migrate and a function 891 * The function takes one list of pages to migrate and a function
@@ -732,13 +894,15 @@ move_newpage:
732 * 894 *
733 * The function returns after 10 attempts or if no pages 895 * The function returns after 10 attempts or if no pages
734 * are movable anymore because to has become empty 896 * are movable anymore because to has become empty
735 * or no retryable pages exist anymore. All pages will be 897 * or no retryable pages exist anymore.
736 * returned to the LRU or freed. 898 * Caller should call putback_lru_pages to return pages to the LRU
899 * or free list only if ret != 0.
737 * 900 *
738 * Return: Number of pages not migrated or error code. 901 * Return: Number of pages not migrated or error code.
739 */ 902 */
740int migrate_pages(struct list_head *from, 903int migrate_pages(struct list_head *from,
741 new_page_t get_new_page, unsigned long private, int offlining) 904 new_page_t get_new_page, unsigned long private, bool offlining,
905 bool sync)
742{ 906{
743 int retry = 1; 907 int retry = 1;
744 int nr_failed = 0; 908 int nr_failed = 0;
@@ -758,7 +922,8 @@ int migrate_pages(struct list_head *from,
758 cond_resched(); 922 cond_resched();
759 923
760 rc = unmap_and_move(get_new_page, private, 924 rc = unmap_and_move(get_new_page, private,
761 page, pass > 2, offlining); 925 page, pass > 2, offlining,
926 sync);
762 927
763 switch(rc) { 928 switch(rc) {
764 case -ENOMEM: 929 case -ENOMEM:
@@ -780,8 +945,50 @@ out:
780 if (!swapwrite) 945 if (!swapwrite)
781 current->flags &= ~PF_SWAPWRITE; 946 current->flags &= ~PF_SWAPWRITE;
782 947
783 putback_lru_pages(from); 948 if (rc)
949 return rc;
950
951 return nr_failed + retry;
952}
953
954int migrate_huge_pages(struct list_head *from,
955 new_page_t get_new_page, unsigned long private, bool offlining,
956 bool sync)
957{
958 int retry = 1;
959 int nr_failed = 0;
960 int pass = 0;
961 struct page *page;
962 struct page *page2;
963 int rc;
964
965 for (pass = 0; pass < 10 && retry; pass++) {
966 retry = 0;
967
968 list_for_each_entry_safe(page, page2, from, lru) {
969 cond_resched();
970
971 rc = unmap_and_move_huge_page(get_new_page,
972 private, page, pass > 2, offlining,
973 sync);
784 974
975 switch(rc) {
976 case -ENOMEM:
977 goto out;
978 case -EAGAIN:
979 retry++;
980 break;
981 case 0:
982 break;
983 default:
984 /* Permanent failure */
985 nr_failed++;
986 break;
987 }
988 }
989 }
990 rc = 0;
991out:
785 if (rc) 992 if (rc)
786 return rc; 993 return rc;
787 994
@@ -841,10 +1048,10 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
841 1048
842 err = -EFAULT; 1049 err = -EFAULT;
843 vma = find_vma(mm, pp->addr); 1050 vma = find_vma(mm, pp->addr);
844 if (!vma || !vma_migratable(vma)) 1051 if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
845 goto set_status; 1052 goto set_status;
846 1053
847 page = follow_page(vma, pp->addr, FOLL_GET); 1054 page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
848 1055
849 err = PTR_ERR(page); 1056 err = PTR_ERR(page);
850 if (IS_ERR(page)) 1057 if (IS_ERR(page))
@@ -890,9 +1097,12 @@ set_status:
890 } 1097 }
891 1098
892 err = 0; 1099 err = 0;
893 if (!list_empty(&pagelist)) 1100 if (!list_empty(&pagelist)) {
894 err = migrate_pages(&pagelist, new_page_node, 1101 err = migrate_pages(&pagelist, new_page_node,
895 (unsigned long)pm, 0); 1102 (unsigned long)pm, 0, true);
1103 if (err)
1104 putback_lru_pages(&pagelist);
1105 }
896 1106
897 up_read(&mm->mmap_sem); 1107 up_read(&mm->mmap_sem);
898 return err; 1108 return err;
@@ -1005,7 +1215,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
1005 int err = -EFAULT; 1215 int err = -EFAULT;
1006 1216
1007 vma = find_vma(mm, addr); 1217 vma = find_vma(mm, addr);
1008 if (!vma) 1218 if (!vma || addr < vma->vm_start)
1009 goto set_status; 1219 goto set_status;
1010 1220
1011 page = follow_page(vma, addr, 0); 1221 page = follow_page(vma, addr, 0);
@@ -1086,14 +1296,14 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
1086 return -EPERM; 1296 return -EPERM;
1087 1297
1088 /* Find the mm_struct */ 1298 /* Find the mm_struct */
1089 read_lock(&tasklist_lock); 1299 rcu_read_lock();
1090 task = pid ? find_task_by_vpid(pid) : current; 1300 task = pid ? find_task_by_vpid(pid) : current;
1091 if (!task) { 1301 if (!task) {
1092 read_unlock(&tasklist_lock); 1302 rcu_read_unlock();
1093 return -ESRCH; 1303 return -ESRCH;
1094 } 1304 }
1095 mm = get_task_mm(task); 1305 mm = get_task_mm(task);
1096 read_unlock(&tasklist_lock); 1306 rcu_read_unlock();
1097 1307
1098 if (!mm) 1308 if (!mm)
1099 return -EINVAL; 1309 return -EINVAL;