aboutsummaryrefslogtreecommitdiffstats
path: root/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/migrate.c')
-rw-r--r--mm/migrate.c123
1 files changed, 64 insertions, 59 deletions
diff --git a/mm/migrate.c b/mm/migrate.c
index 6ae8a66a7045..46fe8cc13d67 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -113,6 +113,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
113 goto out; 113 goto out;
114 114
115 pmd = pmd_offset(pud, addr); 115 pmd = pmd_offset(pud, addr);
116 if (pmd_trans_huge(*pmd))
117 goto out;
116 if (!pmd_present(*pmd)) 118 if (!pmd_present(*pmd))
117 goto out; 119 goto out;
118 120
@@ -246,7 +248,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
246 248
247 expected_count = 2 + page_has_private(page); 249 expected_count = 2 + page_has_private(page);
248 if (page_count(page) != expected_count || 250 if (page_count(page) != expected_count ||
249 (struct page *)radix_tree_deref_slot(pslot) != page) { 251 radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
250 spin_unlock_irq(&mapping->tree_lock); 252 spin_unlock_irq(&mapping->tree_lock);
251 return -EAGAIN; 253 return -EAGAIN;
252 } 254 }
@@ -318,7 +320,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
318 320
319 expected_count = 2 + page_has_private(page); 321 expected_count = 2 + page_has_private(page);
320 if (page_count(page) != expected_count || 322 if (page_count(page) != expected_count ||
321 (struct page *)radix_tree_deref_slot(pslot) != page) { 323 radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) {
322 spin_unlock_irq(&mapping->tree_lock); 324 spin_unlock_irq(&mapping->tree_lock);
323 return -EAGAIN; 325 return -EAGAIN;
324 } 326 }
@@ -614,13 +616,12 @@ static int move_to_new_page(struct page *newpage, struct page *page,
614 * to the newly allocated page in newpage. 616 * to the newly allocated page in newpage.
615 */ 617 */
616static int unmap_and_move(new_page_t get_new_page, unsigned long private, 618static int unmap_and_move(new_page_t get_new_page, unsigned long private,
617 struct page *page, int force, int offlining) 619 struct page *page, int force, bool offlining, bool sync)
618{ 620{
619 int rc = 0; 621 int rc = 0;
620 int *result = NULL; 622 int *result = NULL;
621 struct page *newpage = get_new_page(page, private, &result); 623 struct page *newpage = get_new_page(page, private, &result);
622 int remap_swapcache = 1; 624 int remap_swapcache = 1;
623 int rcu_locked = 0;
624 int charge = 0; 625 int charge = 0;
625 struct mem_cgroup *mem = NULL; 626 struct mem_cgroup *mem = NULL;
626 struct anon_vma *anon_vma = NULL; 627 struct anon_vma *anon_vma = NULL;
@@ -632,6 +633,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
632 /* page was freed from under us. So we are done. */ 633 /* page was freed from under us. So we are done. */
633 goto move_newpage; 634 goto move_newpage;
634 } 635 }
636 if (unlikely(PageTransHuge(page)))
637 if (unlikely(split_huge_page(page)))
638 goto move_newpage;
635 639
636 /* prepare cgroup just returns 0 or -ENOMEM */ 640 /* prepare cgroup just returns 0 or -ENOMEM */
637 rc = -EAGAIN; 641 rc = -EAGAIN;
@@ -639,6 +643,23 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
639 if (!trylock_page(page)) { 643 if (!trylock_page(page)) {
640 if (!force) 644 if (!force)
641 goto move_newpage; 645 goto move_newpage;
646
647 /*
648 * It's not safe for direct compaction to call lock_page.
649 * For example, during page readahead pages are added locked
650 * to the LRU. Later, when the IO completes the pages are
651 * marked uptodate and unlocked. However, the queueing
652 * could be merging multiple pages for one bio (e.g.
653 * mpage_readpages). If an allocation happens for the
654 * second or third page, the process can end up locking
655 * the same page twice and deadlocking. Rather than
656 * trying to be clever about what pages can be locked,
657 * avoid the use of lock_page for direct compaction
658 * altogether.
659 */
660 if (current->flags & PF_MEMALLOC)
661 goto move_newpage;
662
642 lock_page(page); 663 lock_page(page);
643 } 664 }
644 665
@@ -665,27 +686,33 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
665 BUG_ON(charge); 686 BUG_ON(charge);
666 687
667 if (PageWriteback(page)) { 688 if (PageWriteback(page)) {
668 if (!force) 689 if (!force || !sync)
669 goto uncharge; 690 goto uncharge;
670 wait_on_page_writeback(page); 691 wait_on_page_writeback(page);
671 } 692 }
672 /* 693 /*
673 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, 694 * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
674 * we cannot notice that anon_vma is freed while we migrates a page. 695 * we cannot notice that anon_vma is freed while we migrates a page.
675 * This rcu_read_lock() delays freeing anon_vma pointer until the end 696 * This get_anon_vma() delays freeing anon_vma pointer until the end
676 * of migration. File cache pages are no problem because of page_lock() 697 * of migration. File cache pages are no problem because of page_lock()
677 * File Caches may use write_page() or lock_page() in migration, then, 698 * File Caches may use write_page() or lock_page() in migration, then,
678 * just care Anon page here. 699 * just care Anon page here.
679 */ 700 */
680 if (PageAnon(page)) { 701 if (PageAnon(page)) {
681 rcu_read_lock(); 702 /*
682 rcu_locked = 1; 703 * Only page_lock_anon_vma() understands the subtleties of
683 704 * getting a hold on an anon_vma from outside one of its mms.
684 /* Determine how to safely use anon_vma */ 705 */
685 if (!page_mapped(page)) { 706 anon_vma = page_lock_anon_vma(page);
686 if (!PageSwapCache(page)) 707 if (anon_vma) {
687 goto rcu_unlock; 708 /*
688 709 * Take a reference count on the anon_vma if the
710 * page is mapped so that it is guaranteed to
711 * exist when the page is remapped later
712 */
713 get_anon_vma(anon_vma);
714 page_unlock_anon_vma(anon_vma);
715 } else if (PageSwapCache(page)) {
689 /* 716 /*
690 * We cannot be sure that the anon_vma of an unmapped 717 * We cannot be sure that the anon_vma of an unmapped
691 * swapcache page is safe to use because we don't 718 * swapcache page is safe to use because we don't
@@ -700,13 +727,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
700 */ 727 */
701 remap_swapcache = 0; 728 remap_swapcache = 0;
702 } else { 729 } else {
703 /* 730 goto uncharge;
704 * Take a reference count on the anon_vma if the
705 * page is mapped so that it is guaranteed to
706 * exist when the page is remapped later
707 */
708 anon_vma = page_anon_vma(page);
709 get_anon_vma(anon_vma);
710 } 731 }
711 } 732 }
712 733
@@ -723,16 +744,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
723 * free the metadata, so the page can be freed. 744 * free the metadata, so the page can be freed.
724 */ 745 */
725 if (!page->mapping) { 746 if (!page->mapping) {
726 if (!PageAnon(page) && page_has_private(page)) { 747 VM_BUG_ON(PageAnon(page));
727 /* 748 if (page_has_private(page)) {
728 * Go direct to try_to_free_buffers() here because
729 * a) that's what try_to_release_page() would do anyway
730 * b) we may be under rcu_read_lock() here, so we can't
731 * use GFP_KERNEL which is what try_to_release_page()
732 * needs to be effective.
733 */
734 try_to_free_buffers(page); 749 try_to_free_buffers(page);
735 goto rcu_unlock; 750 goto uncharge;
736 } 751 }
737 goto skip_unmap; 752 goto skip_unmap;
738 } 753 }
@@ -746,17 +761,14 @@ skip_unmap:
746 761
747 if (rc && remap_swapcache) 762 if (rc && remap_swapcache)
748 remove_migration_ptes(page, page); 763 remove_migration_ptes(page, page);
749rcu_unlock:
750 764
751 /* Drop an anon_vma reference if we took one */ 765 /* Drop an anon_vma reference if we took one */
752 if (anon_vma) 766 if (anon_vma)
753 drop_anon_vma(anon_vma); 767 drop_anon_vma(anon_vma);
754 768
755 if (rcu_locked)
756 rcu_read_unlock();
757uncharge: 769uncharge:
758 if (!charge) 770 if (!charge)
759 mem_cgroup_end_migration(mem, page, newpage); 771 mem_cgroup_end_migration(mem, page, newpage, rc == 0);
760unlock: 772unlock:
761 unlock_page(page); 773 unlock_page(page);
762 774
@@ -810,12 +822,11 @@ move_newpage:
810 */ 822 */
811static int unmap_and_move_huge_page(new_page_t get_new_page, 823static int unmap_and_move_huge_page(new_page_t get_new_page,
812 unsigned long private, struct page *hpage, 824 unsigned long private, struct page *hpage,
813 int force, int offlining) 825 int force, bool offlining, bool sync)
814{ 826{
815 int rc = 0; 827 int rc = 0;
816 int *result = NULL; 828 int *result = NULL;
817 struct page *new_hpage = get_new_page(hpage, private, &result); 829 struct page *new_hpage = get_new_page(hpage, private, &result);
818 int rcu_locked = 0;
819 struct anon_vma *anon_vma = NULL; 830 struct anon_vma *anon_vma = NULL;
820 831
821 if (!new_hpage) 832 if (!new_hpage)
@@ -824,18 +835,16 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
824 rc = -EAGAIN; 835 rc = -EAGAIN;
825 836
826 if (!trylock_page(hpage)) { 837 if (!trylock_page(hpage)) {
827 if (!force) 838 if (!force || !sync)
828 goto out; 839 goto out;
829 lock_page(hpage); 840 lock_page(hpage);
830 } 841 }
831 842
832 if (PageAnon(hpage)) { 843 if (PageAnon(hpage)) {
833 rcu_read_lock(); 844 anon_vma = page_lock_anon_vma(hpage);
834 rcu_locked = 1; 845 if (anon_vma) {
835 846 get_anon_vma(anon_vma);
836 if (page_mapped(hpage)) { 847 page_unlock_anon_vma(anon_vma);
837 anon_vma = page_anon_vma(hpage);
838 atomic_inc(&anon_vma->external_refcount);
839 } 848 }
840 } 849 }
841 850
@@ -847,16 +856,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
847 if (rc) 856 if (rc)
848 remove_migration_ptes(hpage, hpage); 857 remove_migration_ptes(hpage, hpage);
849 858
850 if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, 859 if (anon_vma)
851 &anon_vma->lock)) { 860 drop_anon_vma(anon_vma);
852 int empty = list_empty(&anon_vma->head);
853 spin_unlock(&anon_vma->lock);
854 if (empty)
855 anon_vma_free(anon_vma);
856 }
857
858 if (rcu_locked)
859 rcu_read_unlock();
860out: 861out:
861 unlock_page(hpage); 862 unlock_page(hpage);
862 863
@@ -892,7 +893,8 @@ out:
892 * Return: Number of pages not migrated or error code. 893 * Return: Number of pages not migrated or error code.
893 */ 894 */
894int migrate_pages(struct list_head *from, 895int migrate_pages(struct list_head *from,
895 new_page_t get_new_page, unsigned long private, int offlining) 896 new_page_t get_new_page, unsigned long private, bool offlining,
897 bool sync)
896{ 898{
897 int retry = 1; 899 int retry = 1;
898 int nr_failed = 0; 900 int nr_failed = 0;
@@ -912,7 +914,8 @@ int migrate_pages(struct list_head *from,
912 cond_resched(); 914 cond_resched();
913 915
914 rc = unmap_and_move(get_new_page, private, 916 rc = unmap_and_move(get_new_page, private,
915 page, pass > 2, offlining); 917 page, pass > 2, offlining,
918 sync);
916 919
917 switch(rc) { 920 switch(rc) {
918 case -ENOMEM: 921 case -ENOMEM:
@@ -941,7 +944,8 @@ out:
941} 944}
942 945
943int migrate_huge_pages(struct list_head *from, 946int migrate_huge_pages(struct list_head *from,
944 new_page_t get_new_page, unsigned long private, int offlining) 947 new_page_t get_new_page, unsigned long private, bool offlining,
948 bool sync)
945{ 949{
946 int retry = 1; 950 int retry = 1;
947 int nr_failed = 0; 951 int nr_failed = 0;
@@ -957,7 +961,8 @@ int migrate_huge_pages(struct list_head *from,
957 cond_resched(); 961 cond_resched();
958 962
959 rc = unmap_and_move_huge_page(get_new_page, 963 rc = unmap_and_move_huge_page(get_new_page,
960 private, page, pass > 2, offlining); 964 private, page, pass > 2, offlining,
965 sync);
961 966
962 switch(rc) { 967 switch(rc) {
963 case -ENOMEM: 968 case -ENOMEM:
@@ -1042,7 +1047,7 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
1042 if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma)) 1047 if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
1043 goto set_status; 1048 goto set_status;
1044 1049
1045 page = follow_page(vma, pp->addr, FOLL_GET); 1050 page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
1046 1051
1047 err = PTR_ERR(page); 1052 err = PTR_ERR(page);
1048 if (IS_ERR(page)) 1053 if (IS_ERR(page))
@@ -1090,7 +1095,7 @@ set_status:
1090 err = 0; 1095 err = 0;
1091 if (!list_empty(&pagelist)) { 1096 if (!list_empty(&pagelist)) {
1092 err = migrate_pages(&pagelist, new_page_node, 1097 err = migrate_pages(&pagelist, new_page_node,
1093 (unsigned long)pm, 0); 1098 (unsigned long)pm, 0, true);
1094 if (err) 1099 if (err)
1095 putback_lru_pages(&pagelist); 1100 putback_lru_pages(&pagelist);
1096 } 1101 }