diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 123 |
1 files changed, 64 insertions, 59 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 6ae8a66a7045..46fe8cc13d67 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -113,6 +113,8 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
113 | goto out; | 113 | goto out; |
114 | 114 | ||
115 | pmd = pmd_offset(pud, addr); | 115 | pmd = pmd_offset(pud, addr); |
116 | if (pmd_trans_huge(*pmd)) | ||
117 | goto out; | ||
116 | if (!pmd_present(*pmd)) | 118 | if (!pmd_present(*pmd)) |
117 | goto out; | 119 | goto out; |
118 | 120 | ||
@@ -246,7 +248,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, | |||
246 | 248 | ||
247 | expected_count = 2 + page_has_private(page); | 249 | expected_count = 2 + page_has_private(page); |
248 | if (page_count(page) != expected_count || | 250 | if (page_count(page) != expected_count || |
249 | (struct page *)radix_tree_deref_slot(pslot) != page) { | 251 | radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { |
250 | spin_unlock_irq(&mapping->tree_lock); | 252 | spin_unlock_irq(&mapping->tree_lock); |
251 | return -EAGAIN; | 253 | return -EAGAIN; |
252 | } | 254 | } |
@@ -318,7 +320,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, | |||
318 | 320 | ||
319 | expected_count = 2 + page_has_private(page); | 321 | expected_count = 2 + page_has_private(page); |
320 | if (page_count(page) != expected_count || | 322 | if (page_count(page) != expected_count || |
321 | (struct page *)radix_tree_deref_slot(pslot) != page) { | 323 | radix_tree_deref_slot_protected(pslot, &mapping->tree_lock) != page) { |
322 | spin_unlock_irq(&mapping->tree_lock); | 324 | spin_unlock_irq(&mapping->tree_lock); |
323 | return -EAGAIN; | 325 | return -EAGAIN; |
324 | } | 326 | } |
@@ -614,13 +616,12 @@ static int move_to_new_page(struct page *newpage, struct page *page, | |||
614 | * to the newly allocated page in newpage. | 616 | * to the newly allocated page in newpage. |
615 | */ | 617 | */ |
616 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, | 618 | static int unmap_and_move(new_page_t get_new_page, unsigned long private, |
617 | struct page *page, int force, int offlining) | 619 | struct page *page, int force, bool offlining, bool sync) |
618 | { | 620 | { |
619 | int rc = 0; | 621 | int rc = 0; |
620 | int *result = NULL; | 622 | int *result = NULL; |
621 | struct page *newpage = get_new_page(page, private, &result); | 623 | struct page *newpage = get_new_page(page, private, &result); |
622 | int remap_swapcache = 1; | 624 | int remap_swapcache = 1; |
623 | int rcu_locked = 0; | ||
624 | int charge = 0; | 625 | int charge = 0; |
625 | struct mem_cgroup *mem = NULL; | 626 | struct mem_cgroup *mem = NULL; |
626 | struct anon_vma *anon_vma = NULL; | 627 | struct anon_vma *anon_vma = NULL; |
@@ -632,6 +633,9 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
632 | /* page was freed from under us. So we are done. */ | 633 | /* page was freed from under us. So we are done. */ |
633 | goto move_newpage; | 634 | goto move_newpage; |
634 | } | 635 | } |
636 | if (unlikely(PageTransHuge(page))) | ||
637 | if (unlikely(split_huge_page(page))) | ||
638 | goto move_newpage; | ||
635 | 639 | ||
636 | /* prepare cgroup just returns 0 or -ENOMEM */ | 640 | /* prepare cgroup just returns 0 or -ENOMEM */ |
637 | rc = -EAGAIN; | 641 | rc = -EAGAIN; |
@@ -639,6 +643,23 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
639 | if (!trylock_page(page)) { | 643 | if (!trylock_page(page)) { |
640 | if (!force) | 644 | if (!force) |
641 | goto move_newpage; | 645 | goto move_newpage; |
646 | |||
647 | /* | ||
648 | * It's not safe for direct compaction to call lock_page. | ||
649 | * For example, during page readahead pages are added locked | ||
650 | * to the LRU. Later, when the IO completes the pages are | ||
651 | * marked uptodate and unlocked. However, the queueing | ||
652 | * could be merging multiple pages for one bio (e.g. | ||
653 | * mpage_readpages). If an allocation happens for the | ||
654 | * second or third page, the process can end up locking | ||
655 | * the same page twice and deadlocking. Rather than | ||
656 | * trying to be clever about what pages can be locked, | ||
657 | * avoid the use of lock_page for direct compaction | ||
658 | * altogether. | ||
659 | */ | ||
660 | if (current->flags & PF_MEMALLOC) | ||
661 | goto move_newpage; | ||
662 | |||
642 | lock_page(page); | 663 | lock_page(page); |
643 | } | 664 | } |
644 | 665 | ||
@@ -665,27 +686,33 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
665 | BUG_ON(charge); | 686 | BUG_ON(charge); |
666 | 687 | ||
667 | if (PageWriteback(page)) { | 688 | if (PageWriteback(page)) { |
668 | if (!force) | 689 | if (!force || !sync) |
669 | goto uncharge; | 690 | goto uncharge; |
670 | wait_on_page_writeback(page); | 691 | wait_on_page_writeback(page); |
671 | } | 692 | } |
672 | /* | 693 | /* |
673 | * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, | 694 | * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, |
674 | * we cannot notice that anon_vma is freed while we migrates a page. | 695 | * we cannot notice that anon_vma is freed while we migrates a page. |
675 | * This rcu_read_lock() delays freeing anon_vma pointer until the end | 696 | * This get_anon_vma() delays freeing anon_vma pointer until the end |
676 | * of migration. File cache pages are no problem because of page_lock() | 697 | * of migration. File cache pages are no problem because of page_lock() |
677 | * File Caches may use write_page() or lock_page() in migration, then, | 698 | * File Caches may use write_page() or lock_page() in migration, then, |
678 | * just care Anon page here. | 699 | * just care Anon page here. |
679 | */ | 700 | */ |
680 | if (PageAnon(page)) { | 701 | if (PageAnon(page)) { |
681 | rcu_read_lock(); | 702 | /* |
682 | rcu_locked = 1; | 703 | * Only page_lock_anon_vma() understands the subtleties of |
683 | 704 | * getting a hold on an anon_vma from outside one of its mms. | |
684 | /* Determine how to safely use anon_vma */ | 705 | */ |
685 | if (!page_mapped(page)) { | 706 | anon_vma = page_lock_anon_vma(page); |
686 | if (!PageSwapCache(page)) | 707 | if (anon_vma) { |
687 | goto rcu_unlock; | 708 | /* |
688 | 709 | * Take a reference count on the anon_vma if the | |
710 | * page is mapped so that it is guaranteed to | ||
711 | * exist when the page is remapped later | ||
712 | */ | ||
713 | get_anon_vma(anon_vma); | ||
714 | page_unlock_anon_vma(anon_vma); | ||
715 | } else if (PageSwapCache(page)) { | ||
689 | /* | 716 | /* |
690 | * We cannot be sure that the anon_vma of an unmapped | 717 | * We cannot be sure that the anon_vma of an unmapped |
691 | * swapcache page is safe to use because we don't | 718 | * swapcache page is safe to use because we don't |
@@ -700,13 +727,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
700 | */ | 727 | */ |
701 | remap_swapcache = 0; | 728 | remap_swapcache = 0; |
702 | } else { | 729 | } else { |
703 | /* | 730 | goto uncharge; |
704 | * Take a reference count on the anon_vma if the | ||
705 | * page is mapped so that it is guaranteed to | ||
706 | * exist when the page is remapped later | ||
707 | */ | ||
708 | anon_vma = page_anon_vma(page); | ||
709 | get_anon_vma(anon_vma); | ||
710 | } | 731 | } |
711 | } | 732 | } |
712 | 733 | ||
@@ -723,16 +744,10 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, | |||
723 | * free the metadata, so the page can be freed. | 744 | * free the metadata, so the page can be freed. |
724 | */ | 745 | */ |
725 | if (!page->mapping) { | 746 | if (!page->mapping) { |
726 | if (!PageAnon(page) && page_has_private(page)) { | 747 | VM_BUG_ON(PageAnon(page)); |
727 | /* | 748 | if (page_has_private(page)) { |
728 | * Go direct to try_to_free_buffers() here because | ||
729 | * a) that's what try_to_release_page() would do anyway | ||
730 | * b) we may be under rcu_read_lock() here, so we can't | ||
731 | * use GFP_KERNEL which is what try_to_release_page() | ||
732 | * needs to be effective. | ||
733 | */ | ||
734 | try_to_free_buffers(page); | 749 | try_to_free_buffers(page); |
735 | goto rcu_unlock; | 750 | goto uncharge; |
736 | } | 751 | } |
737 | goto skip_unmap; | 752 | goto skip_unmap; |
738 | } | 753 | } |
@@ -746,17 +761,14 @@ skip_unmap: | |||
746 | 761 | ||
747 | if (rc && remap_swapcache) | 762 | if (rc && remap_swapcache) |
748 | remove_migration_ptes(page, page); | 763 | remove_migration_ptes(page, page); |
749 | rcu_unlock: | ||
750 | 764 | ||
751 | /* Drop an anon_vma reference if we took one */ | 765 | /* Drop an anon_vma reference if we took one */ |
752 | if (anon_vma) | 766 | if (anon_vma) |
753 | drop_anon_vma(anon_vma); | 767 | drop_anon_vma(anon_vma); |
754 | 768 | ||
755 | if (rcu_locked) | ||
756 | rcu_read_unlock(); | ||
757 | uncharge: | 769 | uncharge: |
758 | if (!charge) | 770 | if (!charge) |
759 | mem_cgroup_end_migration(mem, page, newpage); | 771 | mem_cgroup_end_migration(mem, page, newpage, rc == 0); |
760 | unlock: | 772 | unlock: |
761 | unlock_page(page); | 773 | unlock_page(page); |
762 | 774 | ||
@@ -810,12 +822,11 @@ move_newpage: | |||
810 | */ | 822 | */ |
811 | static int unmap_and_move_huge_page(new_page_t get_new_page, | 823 | static int unmap_and_move_huge_page(new_page_t get_new_page, |
812 | unsigned long private, struct page *hpage, | 824 | unsigned long private, struct page *hpage, |
813 | int force, int offlining) | 825 | int force, bool offlining, bool sync) |
814 | { | 826 | { |
815 | int rc = 0; | 827 | int rc = 0; |
816 | int *result = NULL; | 828 | int *result = NULL; |
817 | struct page *new_hpage = get_new_page(hpage, private, &result); | 829 | struct page *new_hpage = get_new_page(hpage, private, &result); |
818 | int rcu_locked = 0; | ||
819 | struct anon_vma *anon_vma = NULL; | 830 | struct anon_vma *anon_vma = NULL; |
820 | 831 | ||
821 | if (!new_hpage) | 832 | if (!new_hpage) |
@@ -824,18 +835,16 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
824 | rc = -EAGAIN; | 835 | rc = -EAGAIN; |
825 | 836 | ||
826 | if (!trylock_page(hpage)) { | 837 | if (!trylock_page(hpage)) { |
827 | if (!force) | 838 | if (!force || !sync) |
828 | goto out; | 839 | goto out; |
829 | lock_page(hpage); | 840 | lock_page(hpage); |
830 | } | 841 | } |
831 | 842 | ||
832 | if (PageAnon(hpage)) { | 843 | if (PageAnon(hpage)) { |
833 | rcu_read_lock(); | 844 | anon_vma = page_lock_anon_vma(hpage); |
834 | rcu_locked = 1; | 845 | if (anon_vma) { |
835 | 846 | get_anon_vma(anon_vma); | |
836 | if (page_mapped(hpage)) { | 847 | page_unlock_anon_vma(anon_vma); |
837 | anon_vma = page_anon_vma(hpage); | ||
838 | atomic_inc(&anon_vma->external_refcount); | ||
839 | } | 848 | } |
840 | } | 849 | } |
841 | 850 | ||
@@ -847,16 +856,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, | |||
847 | if (rc) | 856 | if (rc) |
848 | remove_migration_ptes(hpage, hpage); | 857 | remove_migration_ptes(hpage, hpage); |
849 | 858 | ||
850 | if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, | 859 | if (anon_vma) |
851 | &anon_vma->lock)) { | 860 | drop_anon_vma(anon_vma); |
852 | int empty = list_empty(&anon_vma->head); | ||
853 | spin_unlock(&anon_vma->lock); | ||
854 | if (empty) | ||
855 | anon_vma_free(anon_vma); | ||
856 | } | ||
857 | |||
858 | if (rcu_locked) | ||
859 | rcu_read_unlock(); | ||
860 | out: | 861 | out: |
861 | unlock_page(hpage); | 862 | unlock_page(hpage); |
862 | 863 | ||
@@ -892,7 +893,8 @@ out: | |||
892 | * Return: Number of pages not migrated or error code. | 893 | * Return: Number of pages not migrated or error code. |
893 | */ | 894 | */ |
894 | int migrate_pages(struct list_head *from, | 895 | int migrate_pages(struct list_head *from, |
895 | new_page_t get_new_page, unsigned long private, int offlining) | 896 | new_page_t get_new_page, unsigned long private, bool offlining, |
897 | bool sync) | ||
896 | { | 898 | { |
897 | int retry = 1; | 899 | int retry = 1; |
898 | int nr_failed = 0; | 900 | int nr_failed = 0; |
@@ -912,7 +914,8 @@ int migrate_pages(struct list_head *from, | |||
912 | cond_resched(); | 914 | cond_resched(); |
913 | 915 | ||
914 | rc = unmap_and_move(get_new_page, private, | 916 | rc = unmap_and_move(get_new_page, private, |
915 | page, pass > 2, offlining); | 917 | page, pass > 2, offlining, |
918 | sync); | ||
916 | 919 | ||
917 | switch(rc) { | 920 | switch(rc) { |
918 | case -ENOMEM: | 921 | case -ENOMEM: |
@@ -941,7 +944,8 @@ out: | |||
941 | } | 944 | } |
942 | 945 | ||
943 | int migrate_huge_pages(struct list_head *from, | 946 | int migrate_huge_pages(struct list_head *from, |
944 | new_page_t get_new_page, unsigned long private, int offlining) | 947 | new_page_t get_new_page, unsigned long private, bool offlining, |
948 | bool sync) | ||
945 | { | 949 | { |
946 | int retry = 1; | 950 | int retry = 1; |
947 | int nr_failed = 0; | 951 | int nr_failed = 0; |
@@ -957,7 +961,8 @@ int migrate_huge_pages(struct list_head *from, | |||
957 | cond_resched(); | 961 | cond_resched(); |
958 | 962 | ||
959 | rc = unmap_and_move_huge_page(get_new_page, | 963 | rc = unmap_and_move_huge_page(get_new_page, |
960 | private, page, pass > 2, offlining); | 964 | private, page, pass > 2, offlining, |
965 | sync); | ||
961 | 966 | ||
962 | switch(rc) { | 967 | switch(rc) { |
963 | case -ENOMEM: | 968 | case -ENOMEM: |
@@ -1042,7 +1047,7 @@ static int do_move_page_to_node_array(struct mm_struct *mm, | |||
1042 | if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma)) | 1047 | if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma)) |
1043 | goto set_status; | 1048 | goto set_status; |
1044 | 1049 | ||
1045 | page = follow_page(vma, pp->addr, FOLL_GET); | 1050 | page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT); |
1046 | 1051 | ||
1047 | err = PTR_ERR(page); | 1052 | err = PTR_ERR(page); |
1048 | if (IS_ERR(page)) | 1053 | if (IS_ERR(page)) |
@@ -1090,7 +1095,7 @@ set_status: | |||
1090 | err = 0; | 1095 | err = 0; |
1091 | if (!list_empty(&pagelist)) { | 1096 | if (!list_empty(&pagelist)) { |
1092 | err = migrate_pages(&pagelist, new_page_node, | 1097 | err = migrate_pages(&pagelist, new_page_node, |
1093 | (unsigned long)pm, 0); | 1098 | (unsigned long)pm, 0, true); |
1094 | if (err) | 1099 | if (err) |
1095 | putback_lru_pages(&pagelist); | 1100 | putback_lru_pages(&pagelist); |
1096 | } | 1101 | } |