diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 589 |
1 files changed, 261 insertions, 328 deletions
@@ -660,17 +660,22 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) | |||
660 | return 1; | 660 | return 1; |
661 | } | 661 | } |
662 | 662 | ||
663 | struct page_referenced_arg { | ||
664 | int mapcount; | ||
665 | int referenced; | ||
666 | unsigned long vm_flags; | ||
667 | struct mem_cgroup *memcg; | ||
668 | }; | ||
663 | /* | 669 | /* |
664 | * Subfunctions of page_referenced: page_referenced_one called | 670 | * arg: page_referenced_arg will be passed |
665 | * repeatedly from either page_referenced_anon or page_referenced_file. | ||
666 | */ | 671 | */ |
667 | int page_referenced_one(struct page *page, struct vm_area_struct *vma, | 672 | int page_referenced_one(struct page *page, struct vm_area_struct *vma, |
668 | unsigned long address, unsigned int *mapcount, | 673 | unsigned long address, void *arg) |
669 | unsigned long *vm_flags) | ||
670 | { | 674 | { |
671 | struct mm_struct *mm = vma->vm_mm; | 675 | struct mm_struct *mm = vma->vm_mm; |
672 | spinlock_t *ptl; | 676 | spinlock_t *ptl; |
673 | int referenced = 0; | 677 | int referenced = 0; |
678 | struct page_referenced_arg *pra = arg; | ||
674 | 679 | ||
675 | if (unlikely(PageTransHuge(page))) { | 680 | if (unlikely(PageTransHuge(page))) { |
676 | pmd_t *pmd; | 681 | pmd_t *pmd; |
@@ -682,13 +687,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
682 | pmd = page_check_address_pmd(page, mm, address, | 687 | pmd = page_check_address_pmd(page, mm, address, |
683 | PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); | 688 | PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl); |
684 | if (!pmd) | 689 | if (!pmd) |
685 | goto out; | 690 | return SWAP_AGAIN; |
686 | 691 | ||
687 | if (vma->vm_flags & VM_LOCKED) { | 692 | if (vma->vm_flags & VM_LOCKED) { |
688 | spin_unlock(ptl); | 693 | spin_unlock(ptl); |
689 | *mapcount = 0; /* break early from loop */ | 694 | pra->vm_flags |= VM_LOCKED; |
690 | *vm_flags |= VM_LOCKED; | 695 | return SWAP_FAIL; /* To break the loop */ |
691 | goto out; | ||
692 | } | 696 | } |
693 | 697 | ||
694 | /* go ahead even if the pmd is pmd_trans_splitting() */ | 698 | /* go ahead even if the pmd is pmd_trans_splitting() */ |
@@ -704,13 +708,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
704 | */ | 708 | */ |
705 | pte = page_check_address(page, mm, address, &ptl, 0); | 709 | pte = page_check_address(page, mm, address, &ptl, 0); |
706 | if (!pte) | 710 | if (!pte) |
707 | goto out; | 711 | return SWAP_AGAIN; |
708 | 712 | ||
709 | if (vma->vm_flags & VM_LOCKED) { | 713 | if (vma->vm_flags & VM_LOCKED) { |
710 | pte_unmap_unlock(pte, ptl); | 714 | pte_unmap_unlock(pte, ptl); |
711 | *mapcount = 0; /* break early from loop */ | 715 | pra->vm_flags |= VM_LOCKED; |
712 | *vm_flags |= VM_LOCKED; | 716 | return SWAP_FAIL; /* To break the loop */ |
713 | goto out; | ||
714 | } | 717 | } |
715 | 718 | ||
716 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 719 | if (ptep_clear_flush_young_notify(vma, address, pte)) { |
@@ -727,113 +730,27 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
727 | pte_unmap_unlock(pte, ptl); | 730 | pte_unmap_unlock(pte, ptl); |
728 | } | 731 | } |
729 | 732 | ||
730 | (*mapcount)--; | 733 | if (referenced) { |
731 | 734 | pra->referenced++; | |
732 | if (referenced) | 735 | pra->vm_flags |= vma->vm_flags; |
733 | *vm_flags |= vma->vm_flags; | ||
734 | out: | ||
735 | return referenced; | ||
736 | } | ||
737 | |||
738 | static int page_referenced_anon(struct page *page, | ||
739 | struct mem_cgroup *memcg, | ||
740 | unsigned long *vm_flags) | ||
741 | { | ||
742 | unsigned int mapcount; | ||
743 | struct anon_vma *anon_vma; | ||
744 | pgoff_t pgoff; | ||
745 | struct anon_vma_chain *avc; | ||
746 | int referenced = 0; | ||
747 | |||
748 | anon_vma = page_lock_anon_vma_read(page); | ||
749 | if (!anon_vma) | ||
750 | return referenced; | ||
751 | |||
752 | mapcount = page_mapcount(page); | ||
753 | pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
754 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { | ||
755 | struct vm_area_struct *vma = avc->vma; | ||
756 | unsigned long address = vma_address(page, vma); | ||
757 | /* | ||
758 | * If we are reclaiming on behalf of a cgroup, skip | ||
759 | * counting on behalf of references from different | ||
760 | * cgroups | ||
761 | */ | ||
762 | if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) | ||
763 | continue; | ||
764 | referenced += page_referenced_one(page, vma, address, | ||
765 | &mapcount, vm_flags); | ||
766 | if (!mapcount) | ||
767 | break; | ||
768 | } | 736 | } |
769 | 737 | ||
770 | page_unlock_anon_vma_read(anon_vma); | 738 | pra->mapcount--; |
771 | return referenced; | 739 | if (!pra->mapcount) |
740 | return SWAP_SUCCESS; /* To break the loop */ | ||
741 | |||
742 | return SWAP_AGAIN; | ||
772 | } | 743 | } |
773 | 744 | ||
774 | /** | 745 | static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) |
775 | * page_referenced_file - referenced check for object-based rmap | ||
776 | * @page: the page we're checking references on. | ||
777 | * @memcg: target memory control group | ||
778 | * @vm_flags: collect encountered vma->vm_flags who actually referenced the page | ||
779 | * | ||
780 | * For an object-based mapped page, find all the places it is mapped and | ||
781 | * check/clear the referenced flag. This is done by following the page->mapping | ||
782 | * pointer, then walking the chain of vmas it holds. It returns the number | ||
783 | * of references it found. | ||
784 | * | ||
785 | * This function is only called from page_referenced for object-based pages. | ||
786 | */ | ||
787 | static int page_referenced_file(struct page *page, | ||
788 | struct mem_cgroup *memcg, | ||
789 | unsigned long *vm_flags) | ||
790 | { | 746 | { |
791 | unsigned int mapcount; | 747 | struct page_referenced_arg *pra = arg; |
792 | struct address_space *mapping = page->mapping; | 748 | struct mem_cgroup *memcg = pra->memcg; |
793 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
794 | struct vm_area_struct *vma; | ||
795 | int referenced = 0; | ||
796 | |||
797 | /* | ||
798 | * The caller's checks on page->mapping and !PageAnon have made | ||
799 | * sure that this is a file page: the check for page->mapping | ||
800 | * excludes the case just before it gets set on an anon page. | ||
801 | */ | ||
802 | BUG_ON(PageAnon(page)); | ||
803 | |||
804 | /* | ||
805 | * The page lock not only makes sure that page->mapping cannot | ||
806 | * suddenly be NULLified by truncation, it makes sure that the | ||
807 | * structure at mapping cannot be freed and reused yet, | ||
808 | * so we can safely take mapping->i_mmap_mutex. | ||
809 | */ | ||
810 | BUG_ON(!PageLocked(page)); | ||
811 | |||
812 | mutex_lock(&mapping->i_mmap_mutex); | ||
813 | 749 | ||
814 | /* | 750 | if (!mm_match_cgroup(vma->vm_mm, memcg)) |
815 | * i_mmap_mutex does not stabilize mapcount at all, but mapcount | 751 | return true; |
816 | * is more likely to be accurate if we note it after spinning. | ||
817 | */ | ||
818 | mapcount = page_mapcount(page); | ||
819 | |||
820 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | ||
821 | unsigned long address = vma_address(page, vma); | ||
822 | /* | ||
823 | * If we are reclaiming on behalf of a cgroup, skip | ||
824 | * counting on behalf of references from different | ||
825 | * cgroups | ||
826 | */ | ||
827 | if (memcg && !mm_match_cgroup(vma->vm_mm, memcg)) | ||
828 | continue; | ||
829 | referenced += page_referenced_one(page, vma, address, | ||
830 | &mapcount, vm_flags); | ||
831 | if (!mapcount) | ||
832 | break; | ||
833 | } | ||
834 | 752 | ||
835 | mutex_unlock(&mapping->i_mmap_mutex); | 753 | return false; |
836 | return referenced; | ||
837 | } | 754 | } |
838 | 755 | ||
839 | /** | 756 | /** |
@@ -851,41 +768,57 @@ int page_referenced(struct page *page, | |||
851 | struct mem_cgroup *memcg, | 768 | struct mem_cgroup *memcg, |
852 | unsigned long *vm_flags) | 769 | unsigned long *vm_flags) |
853 | { | 770 | { |
854 | int referenced = 0; | 771 | int ret; |
855 | int we_locked = 0; | 772 | int we_locked = 0; |
773 | struct page_referenced_arg pra = { | ||
774 | .mapcount = page_mapcount(page), | ||
775 | .memcg = memcg, | ||
776 | }; | ||
777 | struct rmap_walk_control rwc = { | ||
778 | .rmap_one = page_referenced_one, | ||
779 | .arg = (void *)&pra, | ||
780 | .anon_lock = page_lock_anon_vma_read, | ||
781 | }; | ||
856 | 782 | ||
857 | *vm_flags = 0; | 783 | *vm_flags = 0; |
858 | if (page_mapped(page) && page_rmapping(page)) { | 784 | if (!page_mapped(page)) |
859 | if (!is_locked && (!PageAnon(page) || PageKsm(page))) { | 785 | return 0; |
860 | we_locked = trylock_page(page); | 786 | |
861 | if (!we_locked) { | 787 | if (!page_rmapping(page)) |
862 | referenced++; | 788 | return 0; |
863 | goto out; | 789 | |
864 | } | 790 | if (!is_locked && (!PageAnon(page) || PageKsm(page))) { |
865 | } | 791 | we_locked = trylock_page(page); |
866 | if (unlikely(PageKsm(page))) | 792 | if (!we_locked) |
867 | referenced += page_referenced_ksm(page, memcg, | 793 | return 1; |
868 | vm_flags); | ||
869 | else if (PageAnon(page)) | ||
870 | referenced += page_referenced_anon(page, memcg, | ||
871 | vm_flags); | ||
872 | else if (page->mapping) | ||
873 | referenced += page_referenced_file(page, memcg, | ||
874 | vm_flags); | ||
875 | if (we_locked) | ||
876 | unlock_page(page); | ||
877 | } | 794 | } |
878 | out: | 795 | |
879 | return referenced; | 796 | /* |
797 | * If we are reclaiming on behalf of a cgroup, skip | ||
798 | * counting on behalf of references from different | ||
799 | * cgroups | ||
800 | */ | ||
801 | if (memcg) { | ||
802 | rwc.invalid_vma = invalid_page_referenced_vma; | ||
803 | } | ||
804 | |||
805 | ret = rmap_walk(page, &rwc); | ||
806 | *vm_flags = pra.vm_flags; | ||
807 | |||
808 | if (we_locked) | ||
809 | unlock_page(page); | ||
810 | |||
811 | return pra.referenced; | ||
880 | } | 812 | } |
881 | 813 | ||
882 | static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, | 814 | static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, |
883 | unsigned long address) | 815 | unsigned long address, void *arg) |
884 | { | 816 | { |
885 | struct mm_struct *mm = vma->vm_mm; | 817 | struct mm_struct *mm = vma->vm_mm; |
886 | pte_t *pte; | 818 | pte_t *pte; |
887 | spinlock_t *ptl; | 819 | spinlock_t *ptl; |
888 | int ret = 0; | 820 | int ret = 0; |
821 | int *cleaned = arg; | ||
889 | 822 | ||
890 | pte = page_check_address(page, mm, address, &ptl, 1); | 823 | pte = page_check_address(page, mm, address, &ptl, 1); |
891 | if (!pte) | 824 | if (!pte) |
@@ -904,44 +837,44 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma, | |||
904 | 837 | ||
905 | pte_unmap_unlock(pte, ptl); | 838 | pte_unmap_unlock(pte, ptl); |
906 | 839 | ||
907 | if (ret) | 840 | if (ret) { |
908 | mmu_notifier_invalidate_page(mm, address); | 841 | mmu_notifier_invalidate_page(mm, address); |
842 | (*cleaned)++; | ||
843 | } | ||
909 | out: | 844 | out: |
910 | return ret; | 845 | return SWAP_AGAIN; |
911 | } | 846 | } |
912 | 847 | ||
913 | static int page_mkclean_file(struct address_space *mapping, struct page *page) | 848 | static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg) |
914 | { | 849 | { |
915 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 850 | if (vma->vm_flags & VM_SHARED) |
916 | struct vm_area_struct *vma; | 851 | return false; |
917 | int ret = 0; | ||
918 | |||
919 | BUG_ON(PageAnon(page)); | ||
920 | 852 | ||
921 | mutex_lock(&mapping->i_mmap_mutex); | 853 | return true; |
922 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | ||
923 | if (vma->vm_flags & VM_SHARED) { | ||
924 | unsigned long address = vma_address(page, vma); | ||
925 | ret += page_mkclean_one(page, vma, address); | ||
926 | } | ||
927 | } | ||
928 | mutex_unlock(&mapping->i_mmap_mutex); | ||
929 | return ret; | ||
930 | } | 854 | } |
931 | 855 | ||
932 | int page_mkclean(struct page *page) | 856 | int page_mkclean(struct page *page) |
933 | { | 857 | { |
934 | int ret = 0; | 858 | int cleaned = 0; |
859 | struct address_space *mapping; | ||
860 | struct rmap_walk_control rwc = { | ||
861 | .arg = (void *)&cleaned, | ||
862 | .rmap_one = page_mkclean_one, | ||
863 | .invalid_vma = invalid_mkclean_vma, | ||
864 | }; | ||
935 | 865 | ||
936 | BUG_ON(!PageLocked(page)); | 866 | BUG_ON(!PageLocked(page)); |
937 | 867 | ||
938 | if (page_mapped(page)) { | 868 | if (!page_mapped(page)) |
939 | struct address_space *mapping = page_mapping(page); | 869 | return 0; |
940 | if (mapping) | ||
941 | ret = page_mkclean_file(mapping, page); | ||
942 | } | ||
943 | 870 | ||
944 | return ret; | 871 | mapping = page_mapping(page); |
872 | if (!mapping) | ||
873 | return 0; | ||
874 | |||
875 | rmap_walk(page, &rwc); | ||
876 | |||
877 | return cleaned; | ||
945 | } | 878 | } |
946 | EXPORT_SYMBOL_GPL(page_mkclean); | 879 | EXPORT_SYMBOL_GPL(page_mkclean); |
947 | 880 | ||
@@ -961,9 +894,9 @@ void page_move_anon_rmap(struct page *page, | |||
961 | { | 894 | { |
962 | struct anon_vma *anon_vma = vma->anon_vma; | 895 | struct anon_vma *anon_vma = vma->anon_vma; |
963 | 896 | ||
964 | VM_BUG_ON(!PageLocked(page)); | 897 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
965 | VM_BUG_ON(!anon_vma); | 898 | VM_BUG_ON(!anon_vma); |
966 | VM_BUG_ON(page->index != linear_page_index(vma, address)); | 899 | VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page); |
967 | 900 | ||
968 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | 901 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; |
969 | page->mapping = (struct address_space *) anon_vma; | 902 | page->mapping = (struct address_space *) anon_vma; |
@@ -1062,7 +995,7 @@ void do_page_add_anon_rmap(struct page *page, | |||
1062 | if (unlikely(PageKsm(page))) | 995 | if (unlikely(PageKsm(page))) |
1063 | return; | 996 | return; |
1064 | 997 | ||
1065 | VM_BUG_ON(!PageLocked(page)); | 998 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
1066 | /* address might be in next vma when migration races vma_adjust */ | 999 | /* address might be in next vma when migration races vma_adjust */ |
1067 | if (first) | 1000 | if (first) |
1068 | __page_set_anon_rmap(page, vma, address, exclusive); | 1001 | __page_set_anon_rmap(page, vma, address, exclusive); |
@@ -1177,17 +1110,17 @@ out: | |||
1177 | } | 1110 | } |
1178 | 1111 | ||
1179 | /* | 1112 | /* |
1180 | * Subfunctions of try_to_unmap: try_to_unmap_one called | 1113 | * @arg: enum ttu_flags will be passed to this argument |
1181 | * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file. | ||
1182 | */ | 1114 | */ |
1183 | int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, | 1115 | int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, |
1184 | unsigned long address, enum ttu_flags flags) | 1116 | unsigned long address, void *arg) |
1185 | { | 1117 | { |
1186 | struct mm_struct *mm = vma->vm_mm; | 1118 | struct mm_struct *mm = vma->vm_mm; |
1187 | pte_t *pte; | 1119 | pte_t *pte; |
1188 | pte_t pteval; | 1120 | pte_t pteval; |
1189 | spinlock_t *ptl; | 1121 | spinlock_t *ptl; |
1190 | int ret = SWAP_AGAIN; | 1122 | int ret = SWAP_AGAIN; |
1123 | enum ttu_flags flags = (enum ttu_flags)arg; | ||
1191 | 1124 | ||
1192 | pte = page_check_address(page, mm, address, &ptl, 0); | 1125 | pte = page_check_address(page, mm, address, &ptl, 0); |
1193 | if (!pte) | 1126 | if (!pte) |
@@ -1426,93 +1359,9 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
1426 | return ret; | 1359 | return ret; |
1427 | } | 1360 | } |
1428 | 1361 | ||
1429 | bool is_vma_temporary_stack(struct vm_area_struct *vma) | 1362 | static int try_to_unmap_nonlinear(struct page *page, |
1430 | { | 1363 | struct address_space *mapping, void *arg) |
1431 | int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); | ||
1432 | |||
1433 | if (!maybe_stack) | ||
1434 | return false; | ||
1435 | |||
1436 | if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == | ||
1437 | VM_STACK_INCOMPLETE_SETUP) | ||
1438 | return true; | ||
1439 | |||
1440 | return false; | ||
1441 | } | ||
1442 | |||
1443 | /** | ||
1444 | * try_to_unmap_anon - unmap or unlock anonymous page using the object-based | ||
1445 | * rmap method | ||
1446 | * @page: the page to unmap/unlock | ||
1447 | * @flags: action and flags | ||
1448 | * | ||
1449 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
1450 | * contained in the anon_vma struct it points to. | ||
1451 | * | ||
1452 | * This function is only called from try_to_unmap/try_to_munlock for | ||
1453 | * anonymous pages. | ||
1454 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1455 | * where the page was found will be held for write. So, we won't recheck | ||
1456 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1457 | * 'LOCKED. | ||
1458 | */ | ||
1459 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) | ||
1460 | { | ||
1461 | struct anon_vma *anon_vma; | ||
1462 | pgoff_t pgoff; | ||
1463 | struct anon_vma_chain *avc; | ||
1464 | int ret = SWAP_AGAIN; | ||
1465 | |||
1466 | anon_vma = page_lock_anon_vma_read(page); | ||
1467 | if (!anon_vma) | ||
1468 | return ret; | ||
1469 | |||
1470 | pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1471 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { | ||
1472 | struct vm_area_struct *vma = avc->vma; | ||
1473 | unsigned long address; | ||
1474 | |||
1475 | /* | ||
1476 | * During exec, a temporary VMA is setup and later moved. | ||
1477 | * The VMA is moved under the anon_vma lock but not the | ||
1478 | * page tables leading to a race where migration cannot | ||
1479 | * find the migration ptes. Rather than increasing the | ||
1480 | * locking requirements of exec(), migration skips | ||
1481 | * temporary VMAs until after exec() completes. | ||
1482 | */ | ||
1483 | if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) && | ||
1484 | is_vma_temporary_stack(vma)) | ||
1485 | continue; | ||
1486 | |||
1487 | address = vma_address(page, vma); | ||
1488 | ret = try_to_unmap_one(page, vma, address, flags); | ||
1489 | if (ret != SWAP_AGAIN || !page_mapped(page)) | ||
1490 | break; | ||
1491 | } | ||
1492 | |||
1493 | page_unlock_anon_vma_read(anon_vma); | ||
1494 | return ret; | ||
1495 | } | ||
1496 | |||
1497 | /** | ||
1498 | * try_to_unmap_file - unmap/unlock file page using the object-based rmap method | ||
1499 | * @page: the page to unmap/unlock | ||
1500 | * @flags: action and flags | ||
1501 | * | ||
1502 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
1503 | * contained in the address_space struct it points to. | ||
1504 | * | ||
1505 | * This function is only called from try_to_unmap/try_to_munlock for | ||
1506 | * object-based pages. | ||
1507 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1508 | * where the page was found will be held for write. So, we won't recheck | ||
1509 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1510 | * 'LOCKED. | ||
1511 | */ | ||
1512 | static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | ||
1513 | { | 1364 | { |
1514 | struct address_space *mapping = page->mapping; | ||
1515 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1516 | struct vm_area_struct *vma; | 1365 | struct vm_area_struct *vma; |
1517 | int ret = SWAP_AGAIN; | 1366 | int ret = SWAP_AGAIN; |
1518 | unsigned long cursor; | 1367 | unsigned long cursor; |
@@ -1520,30 +1369,9 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1520 | unsigned long max_nl_size = 0; | 1369 | unsigned long max_nl_size = 0; |
1521 | unsigned int mapcount; | 1370 | unsigned int mapcount; |
1522 | 1371 | ||
1523 | if (PageHuge(page)) | 1372 | list_for_each_entry(vma, |
1524 | pgoff = page->index << compound_order(page); | 1373 | &mapping->i_mmap_nonlinear, shared.nonlinear) { |
1525 | 1374 | ||
1526 | mutex_lock(&mapping->i_mmap_mutex); | ||
1527 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | ||
1528 | unsigned long address = vma_address(page, vma); | ||
1529 | ret = try_to_unmap_one(page, vma, address, flags); | ||
1530 | if (ret != SWAP_AGAIN || !page_mapped(page)) | ||
1531 | goto out; | ||
1532 | } | ||
1533 | |||
1534 | if (list_empty(&mapping->i_mmap_nonlinear)) | ||
1535 | goto out; | ||
1536 | |||
1537 | /* | ||
1538 | * We don't bother to try to find the munlocked page in nonlinears. | ||
1539 | * It's costly. Instead, later, page reclaim logic may call | ||
1540 | * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily. | ||
1541 | */ | ||
1542 | if (TTU_ACTION(flags) == TTU_MUNLOCK) | ||
1543 | goto out; | ||
1544 | |||
1545 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | ||
1546 | shared.nonlinear) { | ||
1547 | cursor = (unsigned long) vma->vm_private_data; | 1375 | cursor = (unsigned long) vma->vm_private_data; |
1548 | if (cursor > max_nl_cursor) | 1376 | if (cursor > max_nl_cursor) |
1549 | max_nl_cursor = cursor; | 1377 | max_nl_cursor = cursor; |
@@ -1553,8 +1381,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1553 | } | 1381 | } |
1554 | 1382 | ||
1555 | if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ | 1383 | if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */ |
1556 | ret = SWAP_FAIL; | 1384 | return SWAP_FAIL; |
1557 | goto out; | ||
1558 | } | 1385 | } |
1559 | 1386 | ||
1560 | /* | 1387 | /* |
@@ -1566,7 +1393,8 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1566 | */ | 1393 | */ |
1567 | mapcount = page_mapcount(page); | 1394 | mapcount = page_mapcount(page); |
1568 | if (!mapcount) | 1395 | if (!mapcount) |
1569 | goto out; | 1396 | return ret; |
1397 | |||
1570 | cond_resched(); | 1398 | cond_resched(); |
1571 | 1399 | ||
1572 | max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; | 1400 | max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK; |
@@ -1574,10 +1402,11 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1574 | max_nl_cursor = CLUSTER_SIZE; | 1402 | max_nl_cursor = CLUSTER_SIZE; |
1575 | 1403 | ||
1576 | do { | 1404 | do { |
1577 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, | 1405 | list_for_each_entry(vma, |
1578 | shared.nonlinear) { | 1406 | &mapping->i_mmap_nonlinear, shared.nonlinear) { |
1407 | |||
1579 | cursor = (unsigned long) vma->vm_private_data; | 1408 | cursor = (unsigned long) vma->vm_private_data; |
1580 | while ( cursor < max_nl_cursor && | 1409 | while (cursor < max_nl_cursor && |
1581 | cursor < vma->vm_end - vma->vm_start) { | 1410 | cursor < vma->vm_end - vma->vm_start) { |
1582 | if (try_to_unmap_cluster(cursor, &mapcount, | 1411 | if (try_to_unmap_cluster(cursor, &mapcount, |
1583 | vma, page) == SWAP_MLOCK) | 1412 | vma, page) == SWAP_MLOCK) |
@@ -1585,7 +1414,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1585 | cursor += CLUSTER_SIZE; | 1414 | cursor += CLUSTER_SIZE; |
1586 | vma->vm_private_data = (void *) cursor; | 1415 | vma->vm_private_data = (void *) cursor; |
1587 | if ((int)mapcount <= 0) | 1416 | if ((int)mapcount <= 0) |
1588 | goto out; | 1417 | return ret; |
1589 | } | 1418 | } |
1590 | vma->vm_private_data = (void *) max_nl_cursor; | 1419 | vma->vm_private_data = (void *) max_nl_cursor; |
1591 | } | 1420 | } |
@@ -1600,11 +1429,34 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags) | |||
1600 | */ | 1429 | */ |
1601 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) | 1430 | list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear) |
1602 | vma->vm_private_data = NULL; | 1431 | vma->vm_private_data = NULL; |
1603 | out: | 1432 | |
1604 | mutex_unlock(&mapping->i_mmap_mutex); | ||
1605 | return ret; | 1433 | return ret; |
1606 | } | 1434 | } |
1607 | 1435 | ||
1436 | bool is_vma_temporary_stack(struct vm_area_struct *vma) | ||
1437 | { | ||
1438 | int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP); | ||
1439 | |||
1440 | if (!maybe_stack) | ||
1441 | return false; | ||
1442 | |||
1443 | if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) == | ||
1444 | VM_STACK_INCOMPLETE_SETUP) | ||
1445 | return true; | ||
1446 | |||
1447 | return false; | ||
1448 | } | ||
1449 | |||
1450 | static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) | ||
1451 | { | ||
1452 | return is_vma_temporary_stack(vma); | ||
1453 | } | ||
1454 | |||
1455 | static int page_not_mapped(struct page *page) | ||
1456 | { | ||
1457 | return !page_mapped(page); | ||
1458 | }; | ||
1459 | |||
1608 | /** | 1460 | /** |
1609 | * try_to_unmap - try to remove all page table mappings to a page | 1461 | * try_to_unmap - try to remove all page table mappings to a page |
1610 | * @page: the page to get unmapped | 1462 | * @page: the page to get unmapped |
@@ -1622,16 +1474,29 @@ out: | |||
1622 | int try_to_unmap(struct page *page, enum ttu_flags flags) | 1474 | int try_to_unmap(struct page *page, enum ttu_flags flags) |
1623 | { | 1475 | { |
1624 | int ret; | 1476 | int ret; |
1477 | struct rmap_walk_control rwc = { | ||
1478 | .rmap_one = try_to_unmap_one, | ||
1479 | .arg = (void *)flags, | ||
1480 | .done = page_not_mapped, | ||
1481 | .file_nonlinear = try_to_unmap_nonlinear, | ||
1482 | .anon_lock = page_lock_anon_vma_read, | ||
1483 | }; | ||
1625 | 1484 | ||
1626 | BUG_ON(!PageLocked(page)); | 1485 | VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page); |
1627 | VM_BUG_ON(!PageHuge(page) && PageTransHuge(page)); | 1486 | |
1487 | /* | ||
1488 | * During exec, a temporary VMA is setup and later moved. | ||
1489 | * The VMA is moved under the anon_vma lock but not the | ||
1490 | * page tables leading to a race where migration cannot | ||
1491 | * find the migration ptes. Rather than increasing the | ||
1492 | * locking requirements of exec(), migration skips | ||
1493 | * temporary VMAs until after exec() completes. | ||
1494 | */ | ||
1495 | if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page)) | ||
1496 | rwc.invalid_vma = invalid_migration_vma; | ||
1497 | |||
1498 | ret = rmap_walk(page, &rwc); | ||
1628 | 1499 | ||
1629 | if (unlikely(PageKsm(page))) | ||
1630 | ret = try_to_unmap_ksm(page, flags); | ||
1631 | else if (PageAnon(page)) | ||
1632 | ret = try_to_unmap_anon(page, flags); | ||
1633 | else | ||
1634 | ret = try_to_unmap_file(page, flags); | ||
1635 | if (ret != SWAP_MLOCK && !page_mapped(page)) | 1500 | if (ret != SWAP_MLOCK && !page_mapped(page)) |
1636 | ret = SWAP_SUCCESS; | 1501 | ret = SWAP_SUCCESS; |
1637 | return ret; | 1502 | return ret; |
@@ -1654,14 +1519,25 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) | |||
1654 | */ | 1519 | */ |
1655 | int try_to_munlock(struct page *page) | 1520 | int try_to_munlock(struct page *page) |
1656 | { | 1521 | { |
1657 | VM_BUG_ON(!PageLocked(page) || PageLRU(page)); | 1522 | int ret; |
1523 | struct rmap_walk_control rwc = { | ||
1524 | .rmap_one = try_to_unmap_one, | ||
1525 | .arg = (void *)TTU_MUNLOCK, | ||
1526 | .done = page_not_mapped, | ||
1527 | /* | ||
1528 | * We don't bother to try to find the munlocked page in | ||
1529 | * nonlinears. It's costly. Instead, later, page reclaim logic | ||
1530 | * may call try_to_unmap() and recover PG_mlocked lazily. | ||
1531 | */ | ||
1532 | .file_nonlinear = NULL, | ||
1533 | .anon_lock = page_lock_anon_vma_read, | ||
1658 | 1534 | ||
1659 | if (unlikely(PageKsm(page))) | 1535 | }; |
1660 | return try_to_unmap_ksm(page, TTU_MUNLOCK); | 1536 | |
1661 | else if (PageAnon(page)) | 1537 | VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page); |
1662 | return try_to_unmap_anon(page, TTU_MUNLOCK); | 1538 | |
1663 | else | 1539 | ret = rmap_walk(page, &rwc); |
1664 | return try_to_unmap_file(page, TTU_MUNLOCK); | 1540 | return ret; |
1665 | } | 1541 | } |
1666 | 1542 | ||
1667 | void __put_anon_vma(struct anon_vma *anon_vma) | 1543 | void __put_anon_vma(struct anon_vma *anon_vma) |
@@ -1674,18 +1550,13 @@ void __put_anon_vma(struct anon_vma *anon_vma) | |||
1674 | anon_vma_free(anon_vma); | 1550 | anon_vma_free(anon_vma); |
1675 | } | 1551 | } |
1676 | 1552 | ||
1677 | #ifdef CONFIG_MIGRATION | 1553 | static struct anon_vma *rmap_walk_anon_lock(struct page *page, |
1678 | /* | 1554 | struct rmap_walk_control *rwc) |
1679 | * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): | ||
1680 | * Called by migrate.c to remove migration ptes, but might be used more later. | ||
1681 | */ | ||
1682 | static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | ||
1683 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1684 | { | 1555 | { |
1685 | struct anon_vma *anon_vma; | 1556 | struct anon_vma *anon_vma; |
1686 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1557 | |
1687 | struct anon_vma_chain *avc; | 1558 | if (rwc->anon_lock) |
1688 | int ret = SWAP_AGAIN; | 1559 | return rwc->anon_lock(page); |
1689 | 1560 | ||
1690 | /* | 1561 | /* |
1691 | * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() | 1562 | * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() |
@@ -1695,58 +1566,120 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1695 | */ | 1566 | */ |
1696 | anon_vma = page_anon_vma(page); | 1567 | anon_vma = page_anon_vma(page); |
1697 | if (!anon_vma) | 1568 | if (!anon_vma) |
1698 | return ret; | 1569 | return NULL; |
1570 | |||
1699 | anon_vma_lock_read(anon_vma); | 1571 | anon_vma_lock_read(anon_vma); |
1572 | return anon_vma; | ||
1573 | } | ||
1574 | |||
1575 | /* | ||
1576 | * rmap_walk_anon - do something to anonymous page using the object-based | ||
1577 | * rmap method | ||
1578 | * @page: the page to be handled | ||
1579 | * @rwc: control variable according to each walk type | ||
1580 | * | ||
1581 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
1582 | * contained in the anon_vma struct it points to. | ||
1583 | * | ||
1584 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1585 | * where the page was found will be held for write. So, we won't recheck | ||
1586 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1587 | * LOCKED. | ||
1588 | */ | ||
1589 | static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc) | ||
1590 | { | ||
1591 | struct anon_vma *anon_vma; | ||
1592 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1593 | struct anon_vma_chain *avc; | ||
1594 | int ret = SWAP_AGAIN; | ||
1595 | |||
1596 | anon_vma = rmap_walk_anon_lock(page, rwc); | ||
1597 | if (!anon_vma) | ||
1598 | return ret; | ||
1599 | |||
1700 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { | 1600 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { |
1701 | struct vm_area_struct *vma = avc->vma; | 1601 | struct vm_area_struct *vma = avc->vma; |
1702 | unsigned long address = vma_address(page, vma); | 1602 | unsigned long address = vma_address(page, vma); |
1703 | ret = rmap_one(page, vma, address, arg); | 1603 | |
1604 | if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) | ||
1605 | continue; | ||
1606 | |||
1607 | ret = rwc->rmap_one(page, vma, address, rwc->arg); | ||
1704 | if (ret != SWAP_AGAIN) | 1608 | if (ret != SWAP_AGAIN) |
1705 | break; | 1609 | break; |
1610 | if (rwc->done && rwc->done(page)) | ||
1611 | break; | ||
1706 | } | 1612 | } |
1707 | anon_vma_unlock_read(anon_vma); | 1613 | anon_vma_unlock_read(anon_vma); |
1708 | return ret; | 1614 | return ret; |
1709 | } | 1615 | } |
1710 | 1616 | ||
1711 | static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *, | 1617 | /* |
1712 | struct vm_area_struct *, unsigned long, void *), void *arg) | 1618 | * rmap_walk_file - do something to file page using the object-based rmap method |
1619 | * @page: the page to be handled | ||
1620 | * @rwc: control variable according to each walk type | ||
1621 | * | ||
1622 | * Find all the mappings of a page using the mapping pointer and the vma chains | ||
1623 | * contained in the address_space struct it points to. | ||
1624 | * | ||
1625 | * When called from try_to_munlock(), the mmap_sem of the mm containing the vma | ||
1626 | * where the page was found will be held for write. So, we won't recheck | ||
1627 | * vm_flags for that VMA. That should be OK, because that vma shouldn't be | ||
1628 | * LOCKED. | ||
1629 | */ | ||
1630 | static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc) | ||
1713 | { | 1631 | { |
1714 | struct address_space *mapping = page->mapping; | 1632 | struct address_space *mapping = page->mapping; |
1715 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | 1633 | pgoff_t pgoff = page->index << compound_order(page); |
1716 | struct vm_area_struct *vma; | 1634 | struct vm_area_struct *vma; |
1717 | int ret = SWAP_AGAIN; | 1635 | int ret = SWAP_AGAIN; |
1718 | 1636 | ||
1637 | /* | ||
1638 | * The page lock not only makes sure that page->mapping cannot | ||
1639 | * suddenly be NULLified by truncation, it makes sure that the | ||
1640 | * structure at mapping cannot be freed and reused yet, | ||
1641 | * so we can safely take mapping->i_mmap_mutex. | ||
1642 | */ | ||
1643 | VM_BUG_ON(!PageLocked(page)); | ||
1644 | |||
1719 | if (!mapping) | 1645 | if (!mapping) |
1720 | return ret; | 1646 | return ret; |
1721 | mutex_lock(&mapping->i_mmap_mutex); | 1647 | mutex_lock(&mapping->i_mmap_mutex); |
1722 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { | 1648 | vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { |
1723 | unsigned long address = vma_address(page, vma); | 1649 | unsigned long address = vma_address(page, vma); |
1724 | ret = rmap_one(page, vma, address, arg); | 1650 | |
1651 | if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) | ||
1652 | continue; | ||
1653 | |||
1654 | ret = rwc->rmap_one(page, vma, address, rwc->arg); | ||
1725 | if (ret != SWAP_AGAIN) | 1655 | if (ret != SWAP_AGAIN) |
1726 | break; | 1656 | goto done; |
1657 | if (rwc->done && rwc->done(page)) | ||
1658 | goto done; | ||
1727 | } | 1659 | } |
1728 | /* | 1660 | |
1729 | * No nonlinear handling: being always shared, nonlinear vmas | 1661 | if (!rwc->file_nonlinear) |
1730 | * never contain migration ptes. Decide what to do about this | 1662 | goto done; |
1731 | * limitation to linear when we need rmap_walk() on nonlinear. | 1663 | |
1732 | */ | 1664 | if (list_empty(&mapping->i_mmap_nonlinear)) |
1665 | goto done; | ||
1666 | |||
1667 | ret = rwc->file_nonlinear(page, mapping, rwc->arg); | ||
1668 | |||
1669 | done: | ||
1733 | mutex_unlock(&mapping->i_mmap_mutex); | 1670 | mutex_unlock(&mapping->i_mmap_mutex); |
1734 | return ret; | 1671 | return ret; |
1735 | } | 1672 | } |
1736 | 1673 | ||
1737 | int rmap_walk(struct page *page, int (*rmap_one)(struct page *, | 1674 | int rmap_walk(struct page *page, struct rmap_walk_control *rwc) |
1738 | struct vm_area_struct *, unsigned long, void *), void *arg) | ||
1739 | { | 1675 | { |
1740 | VM_BUG_ON(!PageLocked(page)); | ||
1741 | |||
1742 | if (unlikely(PageKsm(page))) | 1676 | if (unlikely(PageKsm(page))) |
1743 | return rmap_walk_ksm(page, rmap_one, arg); | 1677 | return rmap_walk_ksm(page, rwc); |
1744 | else if (PageAnon(page)) | 1678 | else if (PageAnon(page)) |
1745 | return rmap_walk_anon(page, rmap_one, arg); | 1679 | return rmap_walk_anon(page, rwc); |
1746 | else | 1680 | else |
1747 | return rmap_walk_file(page, rmap_one, arg); | 1681 | return rmap_walk_file(page, rwc); |
1748 | } | 1682 | } |
1749 | #endif /* CONFIG_MIGRATION */ | ||
1750 | 1683 | ||
1751 | #ifdef CONFIG_HUGETLB_PAGE | 1684 | #ifdef CONFIG_HUGETLB_PAGE |
1752 | /* | 1685 | /* |